1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use is governed by OpenSSL license.
5 * ====================================================================
8 #include <openssl/modes.h>
11 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
13 typedef unsigned __int64 u64;
14 #define U64(C) C##UI64
15 #elif defined(__arch64__)
17 typedef unsigned long u64;
20 typedef long long i64;
21 typedef unsigned long long u64;
25 typedef unsigned int u32;
26 typedef unsigned char u8;
28 #define STRICT_ALIGNMENT 1
29 #if defined(__i386) || defined(__i386__) || \
30 defined(__x86_64) || defined(__x86_64__) || \
31 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
32 defined(__s390__) || defined(__s390x__)
33 # undef STRICT_ALIGNMENT
36 #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPNESSL_NO_INLINE_ASM)
37 #if defined(__GNUC__) && __GNUC__>=2
38 # if defined(__x86_64) || defined(__x86_64__)
39 # define BSWAP8(x) ({ u64 ret=(x); \
40 asm volatile ("bswapq %0" \
42 # define BSWAP4(x) ({ u32 ret=(x); \
43 asm volatile ("bswapl %0" \
45 # elif (defined(__i386) || defined(__i386__))
46 # define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
47 asm volatile ("bswapl %0; bswapl %1" \
48 : "+r"(hi),"+r"(lo)); \
50 # define BSWAP4(x) ({ u32 ret=(x); \
51 asm volatile ("bswapl %0" \
54 #elif defined(_MSC_VER)
56 # pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
57 # define BSWAP8(x) _byteswap_uint64((u64)(x))
58 # define BSWAP4(x) _byteswap_ulong((u32)(x))
59 # elif defined(_M_IX86)
60 __inline u32 _bswap4(u32 val) {
64 # define BSWAP4(x) _bswap4(x)
69 #if defined(BSWAP4) && !defined(STRICT_ALIGNMENT)
70 #define GETU32(p) BSWAP4(*(const u32 *)(p))
71 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
73 #define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
74 #define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
79 typedef struct { u64 hi,lo; } u128;
85 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
86 * never be set to 8. 8 is effectively reserved for testing purposes.
87 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
88 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
89 * whole spectrum of possible table driven implementations. Why? In
90 * non-"Shoup's" case memory access pattern is segmented in such manner,
91 * that it's trivial to see that cache timing information can reveal
92 * fair portion of intermediate hash value. Given that ciphertext is
93 * always available to attacker, it's possible for him to attempt to
94 * deduce secret parameter H and if successful, tamper with messages
95 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
96 * not as trivial, but there is no reason to believe that it's resistant
97 * to cache-timing attack. And the thing about "8-bit" implementation is
98 * that it consumes 16 (sixteen) times more memory, 4KB per individual
99 * key + 1KB shared. Well, on pros side it should be twice as fast as
100 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
101 * was observed to run ~75% faster, closer to 100% for commercial
102 * compilers... Yet "4-bit" procedure is preferred, because it's
103 * believed to provide better security-performance balance and adequate
104 * all-round performance. "All-round" refers to things like:
106 * - shorter setup time effectively improves overall timing for
107 * handling short messages;
108 * - larger table allocation can become unbearable because of VM
109 * subsystem penalties (for example on Windows large enough free
110 * results in VM working set trimming, meaning that consequent
111 * malloc would immediately incur working set expansion);
112 * - larger table has larger cache footprint, which can affect
113 * performance of other code paths (not necessarily even from same
114 * thread in Hyper-Threading world);
118 struct gcm128_context {
119 /* Following 6 names follow names in GCM specification */
120 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
122 /* Pre-computed table used by gcm_gmult_* */
127 void (*gmult)(u64 Xi[2],const u128 Htable[16]);
128 void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
130 unsigned int mres, ares;