Non-executable stack in asm.
[openssl.git] / crypto / aes / aes_core.c
index 1e155a31917c8afa7da5be34a261c57c62c2c0cf..a7ec54f4dade256fd6744e8d84bf0c7ca481119e 100644 (file)
 #include <openssl/aes.h>
 #include "aes_locl.h"
 
+#ifndef AES_ASM
 /*
 Te0[x] = S [x].[02, 01, 01, 03];
 Te1[x] = S [x].[03, 02, 01, 01];
 Te2[x] = S [x].[01, 03, 02, 01];
 Te3[x] = S [x].[01, 01, 03, 02];
-Te4[x] = S [x].[01, 01, 01, 01];
 
 Td0[x] = Si[x].[0e, 09, 0d, 0b];
 Td1[x] = Si[x].[0b, 0e, 09, 0d];
 Td2[x] = Si[x].[0d, 0b, 0e, 09];
 Td3[x] = Si[x].[09, 0d, 0b, 0e];
-Td4[x] = Si[x].[01, 01, 01, 01];
+Td4[x] = Si[x].[01];
 */
 
-#ifdef AES_ASM
-extern const u32 AES_Te[5][256];
-#define Te0 AES_Te[0]
-#define Te1 AES_Te[1]
-#define Te2 AES_Te[2]
-#define Te3 AES_Te[3]
-#define Te4 AES_Te[4]
-extern const u32 AES_Td[5][256];
-#define Td0 AES_Td[0]
-#define Td1 AES_Td[1]
-#define Td2 AES_Td[2]
-#define Td3 AES_Td[3]
-#define Td4 AES_Td[4]
-#else
 static const u32 Te0[256] = {
     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
@@ -266,7 +252,6 @@ static const u32 Te2[256] = {
     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
 };
 static const u32 Te3[256] = {
-
     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
@@ -332,72 +317,7 @@ static const u32 Te3[256] = {
     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
 };
-static const u32 Te4[256] = {
-    0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
-    0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
-    0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
-    0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
-    0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
-    0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
-    0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
-    0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
-    0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
-    0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
-    0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
-    0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
-    0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
-    0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
-    0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
-    0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
-    0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
-    0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
-    0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
-    0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
-    0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
-    0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
-    0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
-    0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
-    0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
-    0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
-    0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
-    0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
-    0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
-    0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
-    0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
-    0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
-    0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
-    0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
-    0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
-    0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
-    0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
-    0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
-    0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
-    0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
-    0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
-    0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
-    0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
-    0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
-    0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
-    0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
-    0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
-    0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
-    0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
-    0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
-    0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
-    0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
-    0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
-    0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
-    0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
-    0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
-    0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
-    0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
-    0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
-    0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
-    0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
-    0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
-    0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
-    0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
-};
+
 static const u32 Td0[256] = {
     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
@@ -551,7 +471,6 @@ static const u32 Td2[256] = {
     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
-
     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
@@ -663,73 +582,40 @@ static const u32 Td3[256] = {
     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
 };
-static const u32 Td4[256] = {
-    0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
-    0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
-    0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
-    0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
-    0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
-    0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
-    0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
-    0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
-    0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
-    0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
-    0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
-    0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
-    0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
-    0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
-    0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
-    0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
-    0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
-    0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
-    0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
-    0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
-    0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
-    0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
-    0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
-    0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
-    0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
-    0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
-    0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
-    0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
-    0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
-    0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
-    0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
-    0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
-    0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
-    0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
-    0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
-    0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
-    0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
-    0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
-    0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
-    0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
-    0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
-    0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
-    0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
-    0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
-    0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
-    0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
-    0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
-    0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
-    0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
-    0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
-    0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
-    0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
-    0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
-    0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
-    0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
-    0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
-    0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
-    0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
-    0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
-    0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
-    0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
-    0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
-    0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
-    0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+static const u8 Td4[256] = {
+    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
 };
-#endif
 static const u32 rcon[] = {
        0x01000000, 0x02000000, 0x04000000, 0x08000000,
        0x10000000, 0x20000000, 0x40000000, 0x80000000,
@@ -768,10 +654,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                while (1) {
                        temp  = rk[3];
                        rk[4] = rk[0] ^
-                               (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                               (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+                               (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                               (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
+                               (Te1[(temp >> 24)       ] & 0x000000ff) ^
                                rcon[i];
                        rk[5] = rk[1] ^ rk[4];
                        rk[6] = rk[2] ^ rk[5];
@@ -788,10 +674,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                while (1) {
                        temp = rk[ 5];
                        rk[ 6] = rk[ 0] ^
-                               (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                               (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+                               (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                               (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
+                               (Te1[(temp >> 24)       ] & 0x000000ff) ^
                                rcon[i];
                        rk[ 7] = rk[ 1] ^ rk[ 6];
                        rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -810,10 +696,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                while (1) {
                        temp = rk[ 7];
                        rk[ 8] = rk[ 0] ^
-                               (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                               (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+                               (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                               (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
+                               (Te1[(temp >> 24)       ] & 0x000000ff) ^
                                rcon[i];
                        rk[ 9] = rk[ 1] ^ rk[ 8];
                        rk[10] = rk[ 2] ^ rk[ 9];
@@ -823,10 +709,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                        }
                        temp = rk[11];
                        rk[12] = rk[ 4] ^
-                               (Te4[(temp >> 24)       ] & 0xff000000) ^
-                               (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp      ) & 0xff] & 0x000000ff);
+                               (Te2[(temp >> 24)       ] & 0xff000000) ^
+                               (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
+                               (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
+                               (Te1[(temp      ) & 0xff] & 0x000000ff);
                        rk[13] = rk[ 5] ^ rk[12];
                        rk[14] = rk[ 6] ^ rk[13];
                        rk[15] = rk[ 7] ^ rk[14];
@@ -865,30 +751,29 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
        for (i = 1; i < (key->rounds); i++) {
                rk += 4;
                rk[0] =
-                       Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
+                       Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
+                       Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
+                       Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
+                       Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
                rk[1] =
-                       Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
+                       Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
+                       Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
+                       Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
+                       Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
                rk[2] =
-                       Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
+                       Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
+                       Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
+                       Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
+                       Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
                rk[3] =
-                       Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
+                       Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
+                       Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
+                       Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
+                       Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
        }
        return 0;
 }
 
-#ifndef AES_ASM
 /*
  * Encrypt a single block
  * in and out can overlap
@@ -1051,31 +936,31 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
         * map cipher state to byte array block:
         */
        s0 =
-               (Te4[(t0 >> 24)       ] & 0xff000000) ^
-               (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t3      ) & 0xff] & 0x000000ff) ^
+               (Te2[(t0 >> 24)       ] & 0xff000000) ^
+               (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+               (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+               (Te1[(t3      ) & 0xff] & 0x000000ff) ^
                rk[0];
        PUTU32(out     , s0);
        s1 =
-               (Te4[(t1 >> 24)       ] & 0xff000000) ^
-               (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t0      ) & 0xff] & 0x000000ff) ^
+               (Te2[(t1 >> 24)       ] & 0xff000000) ^
+               (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+               (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+               (Te1[(t0      ) & 0xff] & 0x000000ff) ^
                rk[1];
        PUTU32(out +  4, s1);
        s2 =
-               (Te4[(t2 >> 24)       ] & 0xff000000) ^
-               (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t1      ) & 0xff] & 0x000000ff) ^
+               (Te2[(t2 >> 24)       ] & 0xff000000) ^
+               (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+               (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+               (Te1[(t1      ) & 0xff] & 0x000000ff) ^
                rk[2];
        PUTU32(out +  8, s2);
        s3 =
-               (Te4[(t3 >> 24)       ] & 0xff000000) ^
-               (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t2      ) & 0xff] & 0x000000ff) ^
+               (Te2[(t3 >> 24)       ] & 0xff000000) ^
+               (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+               (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+               (Te1[(t2      ) & 0xff] & 0x000000ff) ^
                rk[3];
        PUTU32(out + 12, s3);
 }
@@ -1242,33 +1127,232 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
         * map cipher state to byte array block:
         */
        s0 =
-               (Td4[(t0 >> 24)       ] & 0xff000000) ^
-               (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t1      ) & 0xff] & 0x000000ff) ^
+               (Td4[(t0 >> 24)       ] << 24) ^
+               (Td4[(t3 >> 16) & 0xff] << 16) ^
+               (Td4[(t2 >>  8) & 0xff] <<  8) ^
+               (Td4[(t1      ) & 0xff])       ^
                rk[0];
        PUTU32(out     , s0);
        s1 =
-               (Td4[(t1 >> 24)       ] & 0xff000000) ^
-               (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t2      ) & 0xff] & 0x000000ff) ^
+               (Td4[(t1 >> 24)       ] << 24) ^
+               (Td4[(t0 >> 16) & 0xff] << 16) ^
+               (Td4[(t3 >>  8) & 0xff] <<  8) ^
+               (Td4[(t2      ) & 0xff])       ^
                rk[1];
        PUTU32(out +  4, s1);
        s2 =
-               (Td4[(t2 >> 24)       ] & 0xff000000) ^
-               (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t3      ) & 0xff] & 0x000000ff) ^
+               (Td4[(t2 >> 24)       ] << 24) ^
+               (Td4[(t1 >> 16) & 0xff] << 16) ^
+               (Td4[(t0 >>  8) & 0xff] <<  8) ^
+               (Td4[(t3      ) & 0xff])       ^
                rk[2];
        PUTU32(out +  8, s2);
        s3 =
-               (Td4[(t3 >> 24)       ] & 0xff000000) ^
-               (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t0      ) & 0xff] & 0x000000ff) ^
+               (Td4[(t3 >> 24)       ] << 24) ^
+               (Td4[(t2 >> 16) & 0xff] << 16) ^
+               (Td4[(t1 >>  8) & 0xff] <<  8) ^
+               (Td4[(t0      ) & 0xff])       ^
                rk[3];
        PUTU32(out + 12, s3);
 }
 
+#else /* AES_ASM */
+
+static const u8 Te4[256] = {
+    0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+    0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+    0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+    0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+    0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+    0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+    0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+    0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+    0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+    0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+    0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+    0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+    0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+    0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+    0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+    0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+    0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+    0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+    0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+    0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+    0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+    0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+    0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+    0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+    0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+    0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+    0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+    0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+    0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+    0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+    0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+    0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+static const u32 rcon[] = {
+       0x01000000, 0x02000000, 0x04000000, 0x08000000,
+       0x10000000, 0x20000000, 0x40000000, 0x80000000,
+       0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+                       AES_KEY *key) {
+       u32 *rk;
+       int i = 0;
+       u32 temp;
+
+       if (!userKey || !key)
+               return -1;
+       if (bits != 128 && bits != 192 && bits != 256)
+               return -2;
+
+       rk = key->rd_key;
+
+       if (bits==128)
+               key->rounds = 10;
+       else if (bits==192)
+               key->rounds = 12;
+       else
+               key->rounds = 14;
+
+       rk[0] = GETU32(userKey     );
+       rk[1] = GETU32(userKey +  4);
+       rk[2] = GETU32(userKey +  8);
+       rk[3] = GETU32(userKey + 12);
+       if (bits == 128) {
+               while (1) {
+                       temp  = rk[3];
+                       rk[4] = rk[0] ^
+                               (Te4[(temp >> 16) & 0xff] << 24) ^
+                               (Te4[(temp >>  8) & 0xff] << 16) ^
+                               (Te4[(temp      ) & 0xff] << 8) ^
+                               (Te4[(temp >> 24)       ]) ^
+                               rcon[i];
+                       rk[5] = rk[1] ^ rk[4];
+                       rk[6] = rk[2] ^ rk[5];
+                       rk[7] = rk[3] ^ rk[6];
+                       if (++i == 10) {
+                               return 0;
+                       }
+                       rk += 4;
+               }
+       }
+       rk[4] = GETU32(userKey + 16);
+       rk[5] = GETU32(userKey + 20);
+       if (bits == 192) {
+               while (1) {
+                       temp = rk[ 5];
+                       rk[ 6] = rk[ 0] ^
+                               (Te4[(temp >> 16) & 0xff] << 24) ^
+                               (Te4[(temp >>  8) & 0xff] << 16) ^
+                               (Te4[(temp      ) & 0xff] << 8) ^
+                               (Te4[(temp >> 24)       ]) ^
+                               rcon[i];
+                       rk[ 7] = rk[ 1] ^ rk[ 6];
+                       rk[ 8] = rk[ 2] ^ rk[ 7];
+                       rk[ 9] = rk[ 3] ^ rk[ 8];
+                       if (++i == 8) {
+                               return 0;
+                       }
+                       rk[10] = rk[ 4] ^ rk[ 9];
+                       rk[11] = rk[ 5] ^ rk[10];
+                       rk += 6;
+               }
+       }
+       rk[6] = GETU32(userKey + 24);
+       rk[7] = GETU32(userKey + 28);
+       if (bits == 256) {
+               while (1) {
+                       temp = rk[ 7];
+                       rk[ 8] = rk[ 0] ^
+                               (Te4[(temp >> 16) & 0xff] << 24) ^
+                               (Te4[(temp >>  8) & 0xff] << 16) ^
+                               (Te4[(temp      ) & 0xff] << 8) ^
+                               (Te4[(temp >> 24)       ]) ^
+                               rcon[i];
+                       rk[ 9] = rk[ 1] ^ rk[ 8];
+                       rk[10] = rk[ 2] ^ rk[ 9];
+                       rk[11] = rk[ 3] ^ rk[10];
+                       if (++i == 7) {
+                               return 0;
+                       }
+                       temp = rk[11];
+                       rk[12] = rk[ 4] ^
+                               (Te4[(temp >> 24)       ] << 24) ^
+                               (Te4[(temp >> 16) & 0xff] << 16) ^
+                               (Te4[(temp >>  8) & 0xff] << 8) ^
+                               (Te4[(temp      ) & 0xff]);
+                       rk[13] = rk[ 5] ^ rk[12];
+                       rk[14] = rk[ 6] ^ rk[13];
+                       rk[15] = rk[ 7] ^ rk[14];
+
+                       rk += 8;
+               }
+       }
+       return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+                        AES_KEY *key) {
+
+        u32 *rk;
+       int i, j, status;
+       u32 temp;
+
+       /* first, start with an encryption schedule */
+       status = AES_set_encrypt_key(userKey, bits, key);
+       if (status < 0)
+               return status;
+
+       rk = key->rd_key;
+
+       /* invert the order of the round keys: */
+       for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+               temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
+               temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+               temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+               temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+       }
+       /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+       for (i = 1; i < (key->rounds); i++) {
+               rk += 4;
+               for (j = 0; j < 4; j++) {
+                       u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+                       tp1 = rk[j];
+                       m = tp1 & 0x80808080;
+                       tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+                               ((m - (m >> 7)) & 0x1b1b1b1b);
+                       m = tp2 & 0x80808080;
+                       tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+                               ((m - (m >> 7)) & 0x1b1b1b1b);
+                       m = tp4 & 0x80808080;
+                       tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+                               ((m - (m >> 7)) & 0x1b1b1b1b);
+                       tp9 = tp8 ^ tp1;
+                       tpb = tp9 ^ tp2;
+                       tpd = tp9 ^ tp4;
+                       tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+                       rk[j] = tpe ^ ROTATE(tpd,16) ^
+                               ROTATE(tp9,24) ^ ROTATE(tpb,8);
+#else
+                       rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 
+                               (tp9 >> 8) ^ (tp9 << 24) ^
+                               (tpb >> 24) ^ (tpb << 8);
+#endif
+               }
+       }
+       return 0;
+}
+
 #endif /* AES_ASM */