crypto/modes/modes_lcl.h

   1 /* ====================================================================
   2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
   3  *
   4  * Redistribution and use is governed by OpenSSL license.
   5  * ====================================================================
   6  */
   7
   8 #include <openssl/modes.h>
   9
  10
  11 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
  12 typedef __int64 i64;
  13 typedef unsigned __int64 u64;
  14 #define U64(C) C##UI64
  15 #elif defined(__arch64__)
  16 typedef long i64;
  17 typedef unsigned long u64;
  18 #define U64(C) C##UL
  19 #else
  20 typedef long long i64;
  21 typedef unsigned long long u64;
  22 #define U64(C) C##ULL
  23 #endif
  24
  25 typedef unsigned int u32;
  26 typedef unsigned char u8;
  27
  28 #define STRICT_ALIGNMENT 1
  29 #if defined(__i386)     || defined(__i386__)    || \
  30     defined(__x86_64)   || defined(__x86_64__)  || \
  31     defined(_M_IX86)    || defined(_M_AMD64)    || defined(_M_X64) || \
  32     defined(__s390__)   || defined(__s390x__)
  33 # undef STRICT_ALIGNMENT
  34 #endif
  35
  36 #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPNESSL_NO_INLINE_ASM)
  37 #if defined(__GNUC__) && __GNUC__>=2
  38 # if defined(__x86_64) || defined(__x86_64__)
  39 #  define BSWAP8(x) ({  u64 ret=(x);                    \
  40                         asm volatile ("bswapq %0"       \
  41                         : "+r"(ret));   ret;            })
  42 #  define BSWAP4(x) ({  u32 ret=(x);                    \
  43                         asm volatile ("bswapl %0"       \
  44                         : "+r"(ret));   ret;            })
  45 # elif (defined(__i386) || defined(__i386__))
  46 #  define BSWAP8(x) ({  u32 lo=(u64)(x)>>32,hi=(x);     \
  47                         asm volatile ("bswapl %0; bswapl %1"    \
  48                         : "+r"(hi),"+r"(lo));           \
  49                         (u64)hi<<32|lo;                 })
  50 #  define BSWAP4(x) ({  u32 ret=(x);                    \
  51                         asm volatile ("bswapl %0"       \
  52                         : "+r"(ret));   ret;            })
  53 # endif
  54 #elif defined(_MSC_VER)
  55 # if _MSC_VER>=1300
  56 #  pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
  57 #  define BSWAP8(x)     _byteswap_uint64((u64)(x))
  58 #  define BSWAP4(x)     _byteswap_ulong((u32)(x))
  59 # elif defined(_M_IX86)
  60    __inline u32 _bswap4(u32 val) {
  61         _asm mov eax,val
  62         _asm bswap eax
  63    }
  64 #  define BSWAP4(x)     _bswap4(x)
  65 # endif
  66 #endif
  67 #endif
  68
  69 #if defined(BSWAP4) && !defined(STRICT_ALIGNMENT)
  70 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
  71 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
  72 #else
  73 #define GETU32(p)       ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
  74 #define PUTU32(p,v)     ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
  75 #endif
  76
  77 /* GCM definitions */
  78
  79 typedef struct { u64 hi,lo; } u128;
  80
  81 #ifdef  TABLE_BITS
  82 #undef  TABLE_BITS
  83 #endif
  84 /*
  85  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
  86  * never be set to 8. 8 is effectively reserved for testing purposes.
  87  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
  88  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
  89  * whole spectrum of possible table driven implementations. Why? In
  90  * non-"Shoup's" case memory access pattern is segmented in such manner,
  91  * that it's trivial to see that cache timing information can reveal
  92  * fair portion of intermediate hash value. Given that ciphertext is
  93  * always available to attacker, it's possible for him to attempt to
  94  * deduce secret parameter H and if successful, tamper with messages
  95  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
  96  * not as trivial, but there is no reason to believe that it's resistant
  97  * to cache-timing attack. And the thing about "8-bit" implementation is
  98  * that it consumes 16 (sixteen) times more memory, 4KB per individual
  99  * key + 1KB shared. Well, on pros side it should be twice as fast as
 100  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
 101  * was observed to run ~75% faster, closer to 100% for commercial
 102  * compilers... Yet "4-bit" procedure is preferred, because it's
 103  * believed to provide better security-performance balance and adequate
 104  * all-round performance. "All-round" refers to things like:
 105  *
 106  * - shorter setup time effectively improves overall timing for
 107  *   handling short messages;
 108  * - larger table allocation can become unbearable because of VM
 109  *   subsystem penalties (for example on Windows large enough free
 110  *   results in VM working set trimming, meaning that consequent
 111  *   malloc would immediately incur working set expansion);
 112  * - larger table has larger cache footprint, which can affect
 113  *   performance of other code paths (not necessarily even from same
 114  *   thread in Hyper-Threading world);
 115  */
 116 #define TABLE_BITS 4
 117
 118 struct gcm128_context {
 119         /* Following 6 names follow names in GCM specification */
 120         union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
 121                                                 Xi,H,len;
 122         /* Pre-computed table used by gcm_gmult_* */
 123 #if TABLE_BITS==8
 124         u128 Htable[256];
 125 #else
 126         u128 Htable[16];
 127         void (*gmult)(u64 Xi[2],const u128 Htable[16]);
 128         void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 129 #endif
 130         unsigned int mres, ares;
 131         block128_f block;
 132         void *key;
 133 };