Usage for -hack and -prexit -verify_return_error
[openssl.git] / crypto / modes / gcm128.c
index ae5fab1b46c7d36aff57be9dc2164e8fa368e88c..19cbcf5572cb9494062a5bed49d3e3ec0905a347 100644 (file)
@@ -645,7 +645,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
 
 #endif
 
-#if    TABLE_BITS==4 && defined(GHASH_ASM)
+#if    TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
 # if   !defined(I386_ONLY) && \
        (defined(__i386)        || defined(__i386__)    || \
         defined(__x86_64)      || defined(__x86_64__)  || \
@@ -658,7 +658,7 @@ void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
-#if defined(__i386) || defined(__i386__)
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
 # define gcm_init_avx  gcm_init_clmul
 # define gcm_gmult_avx gcm_gmult_clmul
 # define gcm_ghash_avx gcm_ghash_clmul
@@ -676,13 +676,21 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
-# elif defined(__arm__) || defined(__arm)
+# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #  include "arm_arch.h"
 #  if __ARM_ARCH__>=7
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
+#   define PMULL_CAPABLE       (OPENSSL_armcap_P & ARMV8_PMULL)
+#   if defined(__arm__) || defined(__arm)
+#    define NEON_CAPABLE       (OPENSSL_armcap_P & ARMV7_NEON)
+#   endif
+void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
 # elif defined(__sparc__) || defined(__sparc)
 #  include "sparc_arch.h"
@@ -766,10 +774,21 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
        ctx->ghash = gcm_ghash_4bit;
 #  endif
 # elif defined(GHASH_ASM_ARM)
-       if (OPENSSL_armcap_P & ARMV7_NEON) {
+#  ifdef PMULL_CAPABLE
+       if (PMULL_CAPABLE) {
+               gcm_init_v8(ctx->Htable,ctx->H.u);
+               ctx->gmult = gcm_gmult_v8;
+               ctx->ghash = gcm_ghash_v8;
+       } else
+#  endif
+#  ifdef NEON_CAPABLE
+       if (NEON_CAPABLE) {
+               gcm_init_neon(ctx->Htable,ctx->H.u);
                ctx->gmult = gcm_gmult_neon;
                ctx->ghash = gcm_ghash_neon;
-       } else {
+       } else
+#  endif
+       {
                gcm_init_4bit(ctx->Htable,ctx->H.u);
                ctx->gmult = gcm_gmult_4bit;
                ctx->ghash = gcm_ghash_4bit;
@@ -847,7 +866,11 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
                GCM_MUL(ctx,Yi);
 
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                        ctr = GETU32(ctx->Yi.c+12);
+#endif
                else
                        ctr = ctx->Yi.d[3];
        }
@@ -855,7 +878,11 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
        (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
        ++ctr;
        if (is_endian.little)
+#ifdef BSWAP4
+               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                PUTU32(ctx->Yi.c+12,ctr);
+#endif
        else
                ctx->Yi.d[3] = ctr;
 }
@@ -950,7 +977,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -984,7 +1015,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1006,7 +1041,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1025,7 +1064,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1041,7 +1084,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        while (len--) {
@@ -1059,7 +1106,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                }
@@ -1103,7 +1154,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -1140,7 +1195,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1160,7 +1219,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1178,7 +1241,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i) {
@@ -1196,7 +1263,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        while (len--) {
@@ -1217,7 +1288,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                }
@@ -1262,7 +1337,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -1284,7 +1363,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
                ctr += GHASH_CHUNK/16;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                GHASH(ctx,out,GHASH_CHUNK);
@@ -1299,7 +1382,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,j,key,ctx->Yi.c);
                ctr += (unsigned int)j;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                in  += i;
@@ -1319,7 +1406,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
                (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
                ++ctr;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                while (len--) {
@@ -1361,7 +1452,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -1386,7 +1481,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
                ctr += GHASH_CHUNK/16;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                out += GHASH_CHUNK;
@@ -1412,7 +1511,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,j,key,ctx->Yi.c);
                ctr += (unsigned int)j;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                out += i;
@@ -1423,7 +1526,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
                (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
                ++ctr;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                while (len--) {