mark all block comments that need format preserving so that
[openssl.git] / crypto / aes / aes_core.c
1 /* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2 /**
3  * rijndael-alg-fst.c
4  *
5  * @version 3.0 (December 2000)
6  *
7  * Optimised ANSI C code for the Rijndael cipher (now AES)
8  *
9  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11  * @author Paulo Barreto <paulo.barreto@terra.com.br>
12  *
13  * This code is hereby placed in the public domain.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 /* Note: rewritten a little bit to provide error control and an OpenSSL-
29    compatible API */
30
31 #ifndef AES_DEBUG
32 # ifndef NDEBUG
33 #  define NDEBUG
34 # endif
35 #endif
36 #include <assert.h>
37
38 #include <stdlib.h>
39 #include <openssl/crypto.h>
40 #include <openssl/aes.h>
41 #include "aes_locl.h"
42
43 #ifndef AES_ASM
44 /*-
45 Te0[x] = S [x].[02, 01, 01, 03];
46 Te1[x] = S [x].[03, 02, 01, 01];
47 Te2[x] = S [x].[01, 03, 02, 01];
48 Te3[x] = S [x].[01, 01, 03, 02];
49
50 Td0[x] = Si[x].[0e, 09, 0d, 0b];
51 Td1[x] = Si[x].[0b, 0e, 09, 0d];
52 Td2[x] = Si[x].[0d, 0b, 0e, 09];
53 Td3[x] = Si[x].[09, 0d, 0b, 0e];
54 Td4[x] = Si[x].[01];
55 */
56
57 static const u32 Te0[256] = {
58     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
59     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
60     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
61     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
62     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
63     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
64     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
65     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
66     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
67     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
68     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
69     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
70     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
71     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
72     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
73     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
74     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
75     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
76     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
77     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
78     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
79     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
80     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
81     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
82     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
83     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
84     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
85     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
86     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
87     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
88     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
89     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
90     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
91     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
92     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
93     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
94     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
95     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
96     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
97     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
98     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
99     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
100     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
101     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
102     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
103     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
104     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
105     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
106     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
107     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
108     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
109     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
110     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
111     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
112     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
113     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
114     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
115     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
116     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
117     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
118     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
119     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
120     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
121     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
122 };
123 static const u32 Te1[256] = {
124     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
125     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
126     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
127     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
128     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
129     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
130     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
131     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
132     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
133     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
134     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
135     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
136     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
137     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
138     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
139     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
140     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
141     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
142     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
143     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
144     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
145     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
146     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
147     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
148     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
149     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
150     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
151     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
152     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
153     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
154     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
155     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
156     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
157     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
158     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
159     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
160     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
161     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
162     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
163     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
164     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
165     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
166     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
167     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
168     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
169     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
170     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
171     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
172     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
173     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
174     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
175     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
176     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
177     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
178     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
179     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
180     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
181     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
182     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
183     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
184     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
185     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
186     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
187     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
188 };
189 static const u32 Te2[256] = {
190     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
191     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
192     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
193     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
194     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
195     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
196     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
197     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
198     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
199     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
200     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
201     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
202     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
203     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
204     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
205     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
206     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
207     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
208     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
209     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
210     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
211     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
212     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
213     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
214     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
215     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
216     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
217     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
218     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
219     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
220     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
221     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
222     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
223     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
224     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
225     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
226     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
227     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
228     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
229     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
230     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
231     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
232     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
233     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
234     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
235     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
236     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
237     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
238     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
239     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
240     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
241     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
242     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
243     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
244     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
245     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
246     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
247     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
248     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
249     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
250     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
251     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
252     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
253     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
254 };
255 static const u32 Te3[256] = {
256     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
257     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
258     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
259     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
260     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
261     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
262     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
263     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
264     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
265     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
266     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
267     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
268     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
269     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
270     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
271     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
272     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
273     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
274     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
275     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
276     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
277     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
278     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
279     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
280     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
281     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
282     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
283     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
284     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
285     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
286     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
287     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
288     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
289     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
290     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
291     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
292     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
293     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
294     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
295     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
296     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
297     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
298     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
299     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
300     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
301     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
302     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
303     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
304     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
305     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
306     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
307     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
308     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
309     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
310     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
311     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
312     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
313     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
314     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
315     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
316     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
317     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
318     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
319     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
320 };
321
322 static const u32 Td0[256] = {
323     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
324     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
325     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
326     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
327     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
328     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
329     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
330     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
331     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
332     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
333     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
334     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
335     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
336     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
337     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
338     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
339     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
340     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
341     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
342     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
343     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
344     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
345     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
346     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
347     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
348     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
349     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
350     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
351     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
352     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
353     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
354     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
355     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
356     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
357     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
358     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
359     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
360     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
361     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
362     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
363     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
364     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
365     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
366     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
367     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
368     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
369     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
370     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
371     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
372     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
373     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
374     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
375     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
376     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
377     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
378     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
379     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
380     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
381     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
382     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
383     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
384     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
385     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
386     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
387 };
388 static const u32 Td1[256] = {
389     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
390     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
391     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
392     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
393     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
394     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
395     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
396     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
397     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
398     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
399     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
400     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
401     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
402     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
403     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
404     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
405     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
406     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
407     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
408     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
409     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
410     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
411     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
412     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
413     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
414     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
415     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
416     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
417     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
418     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
419     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
420     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
421     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
422     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
423     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
424     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
425     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
426     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
427     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
428     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
429     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
430     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
431     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
432     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
433     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
434     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
435     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
436     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
437     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
438     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
439     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
440     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
441     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
442     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
443     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
444     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
445     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
446     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
447     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
448     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
449     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
450     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
451     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
452     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
453 };
454 static const u32 Td2[256] = {
455     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
456     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
457     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
458     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
459     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
460     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
461     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
462     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
463     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
464     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
465     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
466     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
467     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
468     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
469     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
470     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
471     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
472     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
473     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
474     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
475     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
476     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
477     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
478     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
479     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
480     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
481     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
482     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
483     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
484     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
485     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
486     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
487     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
488     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
489     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
490     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
491     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
492     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
493     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
494     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
495     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
496     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
497     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
498     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
499     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
500     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
501     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
502     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
503     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
504     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
505     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
506     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
507     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
508     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
509     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
510     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
511     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
512     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
513     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
514     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
515     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
516     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
517     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
518     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
519 };
520 static const u32 Td3[256] = {
521     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
522     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
523     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
524     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
525     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
526     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
527     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
528     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
529     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
530     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
531     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
532     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
533     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
534     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
535     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
536     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
537     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
538     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
539     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
540     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
541     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
542     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
543     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
544     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
545     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
546     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
547     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
548     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
549     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
550     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
551     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
552     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
553     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
554     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
555     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
556     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
557     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
558     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
559     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
560     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
561     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
562     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
563     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
564     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
565     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
566     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
567     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
568     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
569     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
570     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
571     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
572     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
573     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
574     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
575     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
576     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
577     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
578     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
579     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
580     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
581     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
582     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
583     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
584     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
585 };
586 static const u8 Td4[256] = {
587     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
588     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
589     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
590     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
591     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
592     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
593     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
594     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
595     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
596     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
597     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
598     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
599     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
600     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
601     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
602     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
603     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
604     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
605     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
606     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
607     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
608     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
609     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
610     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
611     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
612     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
613     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
614     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
615     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
616     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
617     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
618     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
619 };
620 static const u32 rcon[] = {
621         0x01000000, 0x02000000, 0x04000000, 0x08000000,
622         0x10000000, 0x20000000, 0x40000000, 0x80000000,
623         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
624 };
625
626 /**
627  * Expand the cipher key into the encryption key schedule.
628  */
629 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
630                         AES_KEY *key) {
631
632         u32 *rk;
633         int i = 0;
634         u32 temp;
635
636         if (!userKey || !key)
637                 return -1;
638         if (bits != 128 && bits != 192 && bits != 256)
639                 return -2;
640
641         rk = key->rd_key;
642
643         if (bits==128)
644                 key->rounds = 10;
645         else if (bits==192)
646                 key->rounds = 12;
647         else
648                 key->rounds = 14;
649
650         rk[0] = GETU32(userKey     );
651         rk[1] = GETU32(userKey +  4);
652         rk[2] = GETU32(userKey +  8);
653         rk[3] = GETU32(userKey + 12);
654         if (bits == 128) {
655                 while (1) {
656                         temp  = rk[3];
657                         rk[4] = rk[0] ^
658                                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
659                                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
660                                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
661                                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
662                                 rcon[i];
663                         rk[5] = rk[1] ^ rk[4];
664                         rk[6] = rk[2] ^ rk[5];
665                         rk[7] = rk[3] ^ rk[6];
666                         if (++i == 10) {
667                                 return 0;
668                         }
669                         rk += 4;
670                 }
671         }
672         rk[4] = GETU32(userKey + 16);
673         rk[5] = GETU32(userKey + 20);
674         if (bits == 192) {
675                 while (1) {
676                         temp = rk[ 5];
677                         rk[ 6] = rk[ 0] ^
678                                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
679                                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
680                                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
681                                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
682                                 rcon[i];
683                         rk[ 7] = rk[ 1] ^ rk[ 6];
684                         rk[ 8] = rk[ 2] ^ rk[ 7];
685                         rk[ 9] = rk[ 3] ^ rk[ 8];
686                         if (++i == 8) {
687                                 return 0;
688                         }
689                         rk[10] = rk[ 4] ^ rk[ 9];
690                         rk[11] = rk[ 5] ^ rk[10];
691                         rk += 6;
692                 }
693         }
694         rk[6] = GETU32(userKey + 24);
695         rk[7] = GETU32(userKey + 28);
696         if (bits == 256) {
697                 while (1) {
698                         temp = rk[ 7];
699                         rk[ 8] = rk[ 0] ^
700                                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
701                                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
702                                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
703                                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
704                                 rcon[i];
705                         rk[ 9] = rk[ 1] ^ rk[ 8];
706                         rk[10] = rk[ 2] ^ rk[ 9];
707                         rk[11] = rk[ 3] ^ rk[10];
708                         if (++i == 7) {
709                                 return 0;
710                         }
711                         temp = rk[11];
712                         rk[12] = rk[ 4] ^
713                                 (Te2[(temp >> 24)       ] & 0xff000000) ^
714                                 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
715                                 (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
716                                 (Te1[(temp      ) & 0xff] & 0x000000ff);
717                         rk[13] = rk[ 5] ^ rk[12];
718                         rk[14] = rk[ 6] ^ rk[13];
719                         rk[15] = rk[ 7] ^ rk[14];
720
721                         rk += 8;
722                 }
723         }
724         return 0;
725 }
726
727 /**
728  * Expand the cipher key into the decryption key schedule.
729  */
730 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
731                          AES_KEY *key) {
732
733         u32 *rk;
734         int i, j, status;
735         u32 temp;
736
737         /* first, start with an encryption schedule */
738         status = AES_set_encrypt_key(userKey, bits, key);
739         if (status < 0)
740                 return status;
741
742         rk = key->rd_key;
743
744         /* invert the order of the round keys: */
745         for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
746                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
747                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
748                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
749                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
750         }
751         /* apply the inverse MixColumn transform to all round keys but the first and the last: */
752         for (i = 1; i < (key->rounds); i++) {
753                 rk += 4;
754                 rk[0] =
755                         Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
756                         Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
757                         Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
758                         Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
759                 rk[1] =
760                         Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
761                         Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
762                         Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
763                         Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
764                 rk[2] =
765                         Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
766                         Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
767                         Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
768                         Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
769                 rk[3] =
770                         Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
771                         Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
772                         Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
773                         Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
774         }
775         return 0;
776 }
777
778 /*
779  * Encrypt a single block
780  * in and out can overlap
781  */
782 void AES_encrypt(const unsigned char *in, unsigned char *out,
783                  const AES_KEY *key) {
784
785         const u32 *rk;
786         u32 s0, s1, s2, s3, t0, t1, t2, t3;
787 #ifndef FULL_UNROLL
788         int r;
789 #endif /* ?FULL_UNROLL */
790
791         assert(in && out && key);
792         rk = key->rd_key;
793
794         /*
795          * map byte array block to cipher state
796          * and add initial round key:
797          */
798         s0 = GETU32(in     ) ^ rk[0];
799         s1 = GETU32(in +  4) ^ rk[1];
800         s2 = GETU32(in +  8) ^ rk[2];
801         s3 = GETU32(in + 12) ^ rk[3];
802 #ifdef FULL_UNROLL
803         /* round 1: */
804         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
805         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
806         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
807         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
808         /* round 2: */
809         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
810         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
811         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
812         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
813         /* round 3: */
814         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
815         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
816         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
817         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
818         /* round 4: */
819         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
820         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
821         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
822         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
823         /* round 5: */
824         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
825         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
826         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
827         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
828         /* round 6: */
829         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
830         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
831         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
832         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
833         /* round 7: */
834         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
835         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
836         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
837         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
838         /* round 8: */
839         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
840         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
841         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
842         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
843         /* round 9: */
844         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
845         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
846         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
847         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
848     if (key->rounds > 10) {
849         /* round 10: */
850         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
851         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
852         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
853         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
854         /* round 11: */
855         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
856         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
857         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
858         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
859         if (key->rounds > 12) {
860             /* round 12: */
861             s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
862             s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
863             s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
864             s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
865             /* round 13: */
866             t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
867             t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
868             t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
869             t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
870         }
871     }
872     rk += key->rounds << 2;
873 #else  /* !FULL_UNROLL */
874     /*
875      * Nr - 1 full rounds:
876      */
877     r = key->rounds >> 1;
878     for (;;) {
879         t0 =
880             Te0[(s0 >> 24)       ] ^
881             Te1[(s1 >> 16) & 0xff] ^
882             Te2[(s2 >>  8) & 0xff] ^
883             Te3[(s3      ) & 0xff] ^
884             rk[4];
885         t1 =
886             Te0[(s1 >> 24)       ] ^
887             Te1[(s2 >> 16) & 0xff] ^
888             Te2[(s3 >>  8) & 0xff] ^
889             Te3[(s0      ) & 0xff] ^
890             rk[5];
891         t2 =
892             Te0[(s2 >> 24)       ] ^
893             Te1[(s3 >> 16) & 0xff] ^
894             Te2[(s0 >>  8) & 0xff] ^
895             Te3[(s1      ) & 0xff] ^
896             rk[6];
897         t3 =
898             Te0[(s3 >> 24)       ] ^
899             Te1[(s0 >> 16) & 0xff] ^
900             Te2[(s1 >>  8) & 0xff] ^
901             Te3[(s2      ) & 0xff] ^
902             rk[7];
903
904         rk += 8;
905         if (--r == 0) {
906             break;
907         }
908
909         s0 =
910             Te0[(t0 >> 24)       ] ^
911             Te1[(t1 >> 16) & 0xff] ^
912             Te2[(t2 >>  8) & 0xff] ^
913             Te3[(t3      ) & 0xff] ^
914             rk[0];
915         s1 =
916             Te0[(t1 >> 24)       ] ^
917             Te1[(t2 >> 16) & 0xff] ^
918             Te2[(t3 >>  8) & 0xff] ^
919             Te3[(t0      ) & 0xff] ^
920             rk[1];
921         s2 =
922             Te0[(t2 >> 24)       ] ^
923             Te1[(t3 >> 16) & 0xff] ^
924             Te2[(t0 >>  8) & 0xff] ^
925             Te3[(t1      ) & 0xff] ^
926             rk[2];
927         s3 =
928             Te0[(t3 >> 24)       ] ^
929             Te1[(t0 >> 16) & 0xff] ^
930             Te2[(t1 >>  8) & 0xff] ^
931             Te3[(t2      ) & 0xff] ^
932             rk[3];
933     }
934 #endif /* ?FULL_UNROLL */
935     /*
936          * apply last round and
937          * map cipher state to byte array block:
938          */
939         s0 =
940                 (Te2[(t0 >> 24)       ] & 0xff000000) ^
941                 (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
942                 (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
943                 (Te1[(t3      ) & 0xff] & 0x000000ff) ^
944                 rk[0];
945         PUTU32(out     , s0);
946         s1 =
947                 (Te2[(t1 >> 24)       ] & 0xff000000) ^
948                 (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
949                 (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
950                 (Te1[(t0      ) & 0xff] & 0x000000ff) ^
951                 rk[1];
952         PUTU32(out +  4, s1);
953         s2 =
954                 (Te2[(t2 >> 24)       ] & 0xff000000) ^
955                 (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
956                 (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
957                 (Te1[(t1      ) & 0xff] & 0x000000ff) ^
958                 rk[2];
959         PUTU32(out +  8, s2);
960         s3 =
961                 (Te2[(t3 >> 24)       ] & 0xff000000) ^
962                 (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
963                 (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
964                 (Te1[(t2      ) & 0xff] & 0x000000ff) ^
965                 rk[3];
966         PUTU32(out + 12, s3);
967 }
968
969 /*
970  * Decrypt a single block
971  * in and out can overlap
972  */
973 void AES_decrypt(const unsigned char *in, unsigned char *out,
974                  const AES_KEY *key) {
975
976         const u32 *rk;
977         u32 s0, s1, s2, s3, t0, t1, t2, t3;
978 #ifndef FULL_UNROLL
979         int r;
980 #endif /* ?FULL_UNROLL */
981
982         assert(in && out && key);
983         rk = key->rd_key;
984
985         /*
986          * map byte array block to cipher state
987          * and add initial round key:
988          */
989     s0 = GETU32(in     ) ^ rk[0];
990     s1 = GETU32(in +  4) ^ rk[1];
991     s2 = GETU32(in +  8) ^ rk[2];
992     s3 = GETU32(in + 12) ^ rk[3];
993 #ifdef FULL_UNROLL
994     /* round 1: */
995     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
996     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
997     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
998     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
999     /* round 2: */
1000     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1001     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1002     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1003     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1004     /* round 3: */
1005     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1006     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1007     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1008     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1009     /* round 4: */
1010     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1011     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1012     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1013     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1014     /* round 5: */
1015     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1016     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1017     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1018     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1019     /* round 6: */
1020     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1021     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1022     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1023     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1024     /* round 7: */
1025     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1026     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1027     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1028     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1029     /* round 8: */
1030     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1031     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1032     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1033     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1034     /* round 9: */
1035     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1036     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1037     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1038     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1039     if (key->rounds > 10) {
1040         /* round 10: */
1041         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1042         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1043         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1044         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1045         /* round 11: */
1046         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1047         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1048         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1049         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1050         if (key->rounds > 12) {
1051             /* round 12: */
1052             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1053             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1054             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1055             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1056             /* round 13: */
1057             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1058             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1059             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1060             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1061         }
1062     }
1063         rk += key->rounds << 2;
1064 #else  /* !FULL_UNROLL */
1065     /*
1066      * Nr - 1 full rounds:
1067      */
1068     r = key->rounds >> 1;
1069     for (;;) {
1070         t0 =
1071             Td0[(s0 >> 24)       ] ^
1072             Td1[(s3 >> 16) & 0xff] ^
1073             Td2[(s2 >>  8) & 0xff] ^
1074             Td3[(s1      ) & 0xff] ^
1075             rk[4];
1076         t1 =
1077             Td0[(s1 >> 24)       ] ^
1078             Td1[(s0 >> 16) & 0xff] ^
1079             Td2[(s3 >>  8) & 0xff] ^
1080             Td3[(s2      ) & 0xff] ^
1081             rk[5];
1082         t2 =
1083             Td0[(s2 >> 24)       ] ^
1084             Td1[(s1 >> 16) & 0xff] ^
1085             Td2[(s0 >>  8) & 0xff] ^
1086             Td3[(s3      ) & 0xff] ^
1087             rk[6];
1088         t3 =
1089             Td0[(s3 >> 24)       ] ^
1090             Td1[(s2 >> 16) & 0xff] ^
1091             Td2[(s1 >>  8) & 0xff] ^
1092             Td3[(s0      ) & 0xff] ^
1093             rk[7];
1094
1095         rk += 8;
1096         if (--r == 0) {
1097             break;
1098         }
1099
1100         s0 =
1101             Td0[(t0 >> 24)       ] ^
1102             Td1[(t3 >> 16) & 0xff] ^
1103             Td2[(t2 >>  8) & 0xff] ^
1104             Td3[(t1      ) & 0xff] ^
1105             rk[0];
1106         s1 =
1107             Td0[(t1 >> 24)       ] ^
1108             Td1[(t0 >> 16) & 0xff] ^
1109             Td2[(t3 >>  8) & 0xff] ^
1110             Td3[(t2      ) & 0xff] ^
1111             rk[1];
1112         s2 =
1113             Td0[(t2 >> 24)       ] ^
1114             Td1[(t1 >> 16) & 0xff] ^
1115             Td2[(t0 >>  8) & 0xff] ^
1116             Td3[(t3      ) & 0xff] ^
1117             rk[2];
1118         s3 =
1119             Td0[(t3 >> 24)       ] ^
1120             Td1[(t2 >> 16) & 0xff] ^
1121             Td2[(t1 >>  8) & 0xff] ^
1122             Td3[(t0      ) & 0xff] ^
1123             rk[3];
1124     }
1125 #endif /* ?FULL_UNROLL */
1126     /*
1127          * apply last round and
1128          * map cipher state to byte array block:
1129          */
1130         s0 =
1131                 (Td4[(t0 >> 24)       ] << 24) ^
1132                 (Td4[(t3 >> 16) & 0xff] << 16) ^
1133                 (Td4[(t2 >>  8) & 0xff] <<  8) ^
1134                 (Td4[(t1      ) & 0xff])       ^
1135                 rk[0];
1136         PUTU32(out     , s0);
1137         s1 =
1138                 (Td4[(t1 >> 24)       ] << 24) ^
1139                 (Td4[(t0 >> 16) & 0xff] << 16) ^
1140                 (Td4[(t3 >>  8) & 0xff] <<  8) ^
1141                 (Td4[(t2      ) & 0xff])       ^
1142                 rk[1];
1143         PUTU32(out +  4, s1);
1144         s2 =
1145                 (Td4[(t2 >> 24)       ] << 24) ^
1146                 (Td4[(t1 >> 16) & 0xff] << 16) ^
1147                 (Td4[(t0 >>  8) & 0xff] <<  8) ^
1148                 (Td4[(t3      ) & 0xff])       ^
1149                 rk[2];
1150         PUTU32(out +  8, s2);
1151         s3 =
1152                 (Td4[(t3 >> 24)       ] << 24) ^
1153                 (Td4[(t2 >> 16) & 0xff] << 16) ^
1154                 (Td4[(t1 >>  8) & 0xff] <<  8) ^
1155                 (Td4[(t0      ) & 0xff])       ^
1156                 rk[3];
1157         PUTU32(out + 12, s3);
1158 }
1159
1160 #else /* AES_ASM */
1161
1162 static const u8 Te4[256] = {
1163     0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1164     0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1165     0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1166     0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1167     0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1168     0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1169     0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1170     0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1171     0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1172     0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1173     0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1174     0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1175     0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1176     0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1177     0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1178     0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1179     0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1180     0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1181     0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1182     0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1183     0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1184     0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1185     0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1186     0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1187     0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1188     0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1189     0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1190     0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1191     0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1192     0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1193     0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1194     0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1195 };
1196 static const u32 rcon[] = {
1197         0x01000000, 0x02000000, 0x04000000, 0x08000000,
1198         0x10000000, 0x20000000, 0x40000000, 0x80000000,
1199         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1200 };
1201
1202 /**
1203  * Expand the cipher key into the encryption key schedule.
1204  */
1205 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1206                         AES_KEY *key) {
1207         u32 *rk;
1208         int i = 0;
1209         u32 temp;
1210
1211         if (!userKey || !key)
1212                 return -1;
1213         if (bits != 128 && bits != 192 && bits != 256)
1214                 return -2;
1215
1216         rk = key->rd_key;
1217
1218         if (bits==128)
1219                 key->rounds = 10;
1220         else if (bits==192)
1221                 key->rounds = 12;
1222         else
1223                 key->rounds = 14;
1224
1225         rk[0] = GETU32(userKey     );
1226         rk[1] = GETU32(userKey +  4);
1227         rk[2] = GETU32(userKey +  8);
1228         rk[3] = GETU32(userKey + 12);
1229         if (bits == 128) {
1230                 while (1) {
1231                         temp  = rk[3];
1232                         rk[4] = rk[0] ^
1233                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1234                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1235                                 (Te4[(temp      ) & 0xff] << 8) ^
1236                                 (Te4[(temp >> 24)       ]) ^
1237                                 rcon[i];
1238                         rk[5] = rk[1] ^ rk[4];
1239                         rk[6] = rk[2] ^ rk[5];
1240                         rk[7] = rk[3] ^ rk[6];
1241                         if (++i == 10) {
1242                                 return 0;
1243                         }
1244                         rk += 4;
1245                 }
1246         }
1247         rk[4] = GETU32(userKey + 16);
1248         rk[5] = GETU32(userKey + 20);
1249         if (bits == 192) {
1250                 while (1) {
1251                         temp = rk[ 5];
1252                         rk[ 6] = rk[ 0] ^
1253                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1254                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1255                                 (Te4[(temp      ) & 0xff] << 8) ^
1256                                 (Te4[(temp >> 24)       ]) ^
1257                                 rcon[i];
1258                         rk[ 7] = rk[ 1] ^ rk[ 6];
1259                         rk[ 8] = rk[ 2] ^ rk[ 7];
1260                         rk[ 9] = rk[ 3] ^ rk[ 8];
1261                         if (++i == 8) {
1262                                 return 0;
1263                         }
1264                         rk[10] = rk[ 4] ^ rk[ 9];
1265                         rk[11] = rk[ 5] ^ rk[10];
1266                         rk += 6;
1267                 }
1268         }
1269         rk[6] = GETU32(userKey + 24);
1270         rk[7] = GETU32(userKey + 28);
1271         if (bits == 256) {
1272                 while (1) {
1273                         temp = rk[ 7];
1274                         rk[ 8] = rk[ 0] ^
1275                                 (Te4[(temp >> 16) & 0xff] << 24) ^
1276                                 (Te4[(temp >>  8) & 0xff] << 16) ^
1277                                 (Te4[(temp      ) & 0xff] << 8) ^
1278                                 (Te4[(temp >> 24)       ]) ^
1279                                 rcon[i];
1280                         rk[ 9] = rk[ 1] ^ rk[ 8];
1281                         rk[10] = rk[ 2] ^ rk[ 9];
1282                         rk[11] = rk[ 3] ^ rk[10];
1283                         if (++i == 7) {
1284                                 return 0;
1285                         }
1286                         temp = rk[11];
1287                         rk[12] = rk[ 4] ^
1288                                 (Te4[(temp >> 24)       ] << 24) ^
1289                                 (Te4[(temp >> 16) & 0xff] << 16) ^
1290                                 (Te4[(temp >>  8) & 0xff] << 8) ^
1291                                 (Te4[(temp      ) & 0xff]);
1292                         rk[13] = rk[ 5] ^ rk[12];
1293                         rk[14] = rk[ 6] ^ rk[13];
1294                         rk[15] = rk[ 7] ^ rk[14];
1295
1296                         rk += 8;
1297                 }
1298         }
1299         return 0;
1300 }
1301
1302 /**
1303  * Expand the cipher key into the decryption key schedule.
1304  */
1305 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1306                          AES_KEY *key) {
1307
1308         u32 *rk;
1309         int i, j, status;
1310         u32 temp;
1311
1312         /* first, start with an encryption schedule */
1313         status = AES_set_encrypt_key(userKey, bits, key);
1314         if (status < 0)
1315                 return status;
1316
1317         rk = key->rd_key;
1318
1319         /* invert the order of the round keys: */
1320         for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1321                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1322                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1323                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1324                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1325         }
1326         /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1327         for (i = 1; i < (key->rounds); i++) {
1328                 rk += 4;
1329                 for (j = 0; j < 4; j++) {
1330                         u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1331
1332                         tp1 = rk[j];
1333                         m = tp1 & 0x80808080;
1334                         tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1335                                 ((m - (m >> 7)) & 0x1b1b1b1b);
1336                         m = tp2 & 0x80808080;
1337                         tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1338                                 ((m - (m >> 7)) & 0x1b1b1b1b);
1339                         m = tp4 & 0x80808080;
1340                         tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1341                                 ((m - (m >> 7)) & 0x1b1b1b1b);
1342                         tp9 = tp8 ^ tp1;
1343                         tpb = tp9 ^ tp2;
1344                         tpd = tp9 ^ tp4;
1345                         tpe = tp8 ^ tp4 ^ tp2;
1346 #if defined(ROTATE)
1347                         rk[j] = tpe ^ ROTATE(tpd,16) ^
1348                                 ROTATE(tp9,24) ^ ROTATE(tpb,8);
1349 #else
1350                         rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 
1351                                 (tp9 >> 8) ^ (tp9 << 24) ^
1352                                 (tpb >> 24) ^ (tpb << 8);
1353 #endif
1354                 }
1355         }
1356         return 0;
1357 }
1358
1359 #endif /* AES_ASM */