1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include "modes_lcl.h"
60 typedef struct { u64 hi,lo; } u128;
62 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
63 /* redefine, because alignment is ensured */
65 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
70 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
71 #define REDUCE1BIT(V) do { \
72 if (sizeof(size_t)==8) { \
73 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
74 V.lo = (V.hi<<63)|(V.lo>>1); \
75 V.hi = (V.hi>>1 )^T; \
78 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
79 V.lo = (V.hi<<63)|(V.lo>>1); \
80 V.hi = (V.hi>>1 )^((u64)T<<32); \
88 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
89 * never be set to 8. 8 is effectively reserved for testing purposes.
90 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
91 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
92 * whole spectrum of possible table driven implementations. Why? In
93 * non-"Shoup's" case memory access pattern is segmented in such manner,
94 * that it's trivial to see that cache timing information can reveal
95 * fair portion of intermediate hash value. Given that ciphertext is
96 * always available to attacker, it's possible for him to attempt to
97 * deduce secret parameter H and if successful, tamper with messages
98 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
99 * not as trivial, but there is no reason to believe that it's resistant
100 * to cache-timing attack. And the thing about "8-bit" implementation is
101 * that it consumes 16 (sixteen) times more memory, 4KB per individual
102 * key + 1KB shared. Well, on pros side it should be twice as fast as
103 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
104 * was observed to run ~75% faster, closer to 100% for commercial
105 * compilers... Yet "4-bit" procedure is preferred, because it's
106 * believed to provide better security-performance balance and adequate
107 * all-round performance. "All-round" refers to things like:
109 * - shorter setup time effectively improves overall timing for
110 * handling short messages;
111 * - larger table allocation can become unbearable because of VM
112 * subsystem penalties (for example on Windows large enough free
113 * results in VM working set trimming, meaning that consequent
114 * malloc would immediately incur working set expansion);
115 * - larger table has larger cache footprint, which can affect
116 * performance of other code paths (not necessarily even from same
117 * thread in Hyper-Threading world);
123 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
133 for (Htable[128]=V, i=64; i>0; i>>=1) {
138 for (i=2; i<256; i<<=1) {
139 u128 *Hi = Htable+i, H0 = *Hi;
140 for (j=1; j<i; ++j) {
141 Hi[j].hi = H0.hi^Htable[j].hi;
142 Hi[j].lo = H0.lo^Htable[j].lo;
147 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
150 const u8 *xi = (const u8 *)Xi+15;
152 const union { long one; char little; } is_endian = {1};
153 static const size_t rem_8bit[256] = {
154 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
155 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
156 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
157 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
158 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
159 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
160 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
161 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
162 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
163 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
164 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
165 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
166 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
167 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
168 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
169 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
170 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
171 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
172 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
173 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
174 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
175 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
176 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
177 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
178 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
179 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
180 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
181 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
182 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
183 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
184 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
185 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
186 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
187 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
188 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
189 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
190 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
191 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
192 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
193 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
194 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
195 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
196 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
197 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
198 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
199 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
200 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
201 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
202 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
203 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
204 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
205 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
206 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
207 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
208 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
209 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
210 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
211 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
212 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
213 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
214 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
215 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
216 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
217 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
220 Z.hi ^= Htable[n].hi;
221 Z.lo ^= Htable[n].lo;
223 if ((u8 *)Xi==xi) break;
227 rem = (size_t)Z.lo&0xff;
228 Z.lo = (Z.hi<<56)|(Z.lo>>8);
230 if (sizeof(size_t)==8)
231 Z.hi ^= rem_8bit[rem];
233 Z.hi ^= (u64)rem_8bit[rem]<<32;
236 if (is_endian.little) {
238 Xi[0] = BSWAP8(Z.hi);
239 Xi[1] = BSWAP8(Z.lo);
243 v = (u32)(Z.hi>>32); PUTU32(p,v);
244 v = (u32)(Z.hi); PUTU32(p+4,v);
245 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
246 v = (u32)(Z.lo); PUTU32(p+12,v);
254 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
258 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
261 #if defined(OPENSSL_SMALL_FOOTPRINT)
270 #if defined(OPENSSL_SMALL_FOOTPRINT)
271 for (Htable[8]=V, i=4; i>0; i>>=1) {
276 for (i=2; i<16; i<<=1) {
279 for (V=*Hi, j=1; j<i; ++j) {
280 Hi[j].hi = V.hi^Htable[j].hi;
281 Hi[j].lo = V.lo^Htable[j].lo;
292 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
294 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
295 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
296 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
298 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
299 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
300 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
301 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
302 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
303 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
304 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
306 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
308 * ARM assembler expects specific dword order in Htable.
312 const union { long one; char little; } is_endian = {1};
314 if (is_endian.little)
323 Htable[j].hi = V.lo<<32|V.lo>>32;
324 Htable[j].lo = V.hi<<32|V.hi>>32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
341 size_t rem, nlo, nhi;
342 const union { long one; char little; } is_endian = {1};
344 nlo = ((const u8 *)Xi)[15];
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
365 nlo = ((const u8 *)Xi)[cnt];
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
381 if (is_endian.little) {
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
418 nlo = ((const u8 *)Xi)[15];
423 Z.hi = Htable[nlo].hi;
424 Z.lo = Htable[nlo].lo;
427 rem = (size_t)Z.lo&0xf;
428 Z.lo = (Z.hi<<60)|(Z.lo>>4);
430 if (sizeof(size_t)==8)
431 Z.hi ^= rem_4bit[rem];
433 Z.hi ^= (u64)rem_4bit[rem]<<32;
435 Z.hi ^= Htable[nhi].hi;
436 Z.lo ^= Htable[nhi].lo;
440 nlo = ((const u8 *)Xi)[cnt];
445 rem = (size_t)Z.lo&0xf;
446 Z.lo = (Z.hi<<60)|(Z.lo>>4);
448 if (sizeof(size_t)==8)
449 Z.hi ^= rem_4bit[rem];
451 Z.hi ^= (u64)rem_4bit[rem]<<32;
453 Z.hi ^= Htable[nlo].hi;
454 Z.lo ^= Htable[nlo].lo;
457 if (is_endian.little) {
459 Xi[0] = BSWAP8(Z.hi);
460 Xi[1] = BSWAP8(Z.lo);
464 v = (u32)(Z.hi>>32); PUTU32(p,v);
465 v = (u32)(Z.hi); PUTU32(p+4,v);
466 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
467 v = (u32)(Z.lo); PUTU32(p+12,v);
474 } while (inp+=16, len-=16);
478 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
479 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
482 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
483 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
484 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
485 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
486 * trashing effect. In other words idea is to hash data while it's
487 * still in L1 cache after encryption pass... */
488 #define GHASH_CHUNK 1024
491 #else /* TABLE_BITS */
493 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
498 const long *xi = (const long *)Xi;
499 const union { long one; char little; } is_endian = {1};
501 V.hi = H[0]; /* H is in host byte order, no byte swapping */
504 for (j=0; j<16/sizeof(long); ++j) {
505 if (is_endian.little) {
506 if (sizeof(long)==8) {
508 X = (long)(BSWAP8(xi[j]));
510 const u8 *p = (const u8 *)(xi+j);
511 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
515 const u8 *p = (const u8 *)(xi+j);
522 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
523 u64 M = (u64)(X>>(8*sizeof(long)-1));
531 if (is_endian.little) {
533 Xi[0] = BSWAP8(Z.hi);
534 Xi[1] = BSWAP8(Z.lo);
538 v = (u32)(Z.hi>>32); PUTU32(p,v);
539 v = (u32)(Z.hi); PUTU32(p+4,v);
540 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
541 v = (u32)(Z.lo); PUTU32(p+12,v);
549 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
553 struct gcm128_context {
554 /* Following 6 names follow names in GCM specification */
555 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
557 /* Pre-computed table used by gcm_gmult_* */
562 void (*gmult)(u64 Xi[2],const u128 Htable[16]);
563 void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
565 unsigned int res, pad;
570 #if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
571 (defined(__i386) || defined(__i386__) || \
572 defined(__x86_64) || defined(__x86_64__) || \
573 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
574 # define GHASH_ASM_IAX
575 extern unsigned int OPENSSL_ia32cap_P[2];
577 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
578 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
579 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
581 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
582 # define GHASH_ASM_X86
583 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
584 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
586 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
587 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
591 # define GCM_MUL(ctx,Xi) (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
593 # define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
596 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
598 const union { long one; char little; } is_endian = {1};
600 memset(ctx,0,sizeof(*ctx));
604 (*block)(ctx->H.c,ctx->H.c,key);
606 if (is_endian.little) {
607 /* H is stored in host byte order */
609 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
610 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
614 hi = (u64)GETU32(p) <<32|GETU32(p+4);
615 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
622 gcm_init_8bit(ctx->Htable,ctx->H.u);
624 # if defined(GHASH_ASM_IAX)
625 if (OPENSSL_ia32cap_P[1]&(1<<1)) {
626 gcm_init_clmul(ctx->Htable,ctx->H.u);
627 ctx->gmult = gcm_gmult_clmul;
628 ctx->ghash = gcm_ghash_clmul;
631 gcm_init_4bit(ctx->Htable,ctx->H.u);
632 # if defined(GHASH_ASM_X86)
633 if (OPENSSL_ia32cap_P[0]&(1<<23)) {
634 ctx->gmult = gcm_gmult_4bit_mmx;
635 ctx->ghash = gcm_ghash_4bit_mmx;
637 ctx->gmult = gcm_gmult_4bit_x86;
638 ctx->ghash = gcm_ghash_4bit_x86;
641 ctx->gmult = gcm_gmult_4bit;
642 ctx->ghash = gcm_ghash_4bit;
645 gcm_init_4bit(ctx->Htable,ctx->H.u);
650 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
652 const union { long one; char little; } is_endian = {1};
664 memcpy(ctx->Yi.c,iv,12);
673 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
679 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
683 if (is_endian.little) {
685 ctx->Yi.u[1] ^= BSWAP8(len0);
687 ctx->Yi.c[8] ^= (u8)(len0>>56);
688 ctx->Yi.c[9] ^= (u8)(len0>>48);
689 ctx->Yi.c[10] ^= (u8)(len0>>40);
690 ctx->Yi.c[11] ^= (u8)(len0>>32);
691 ctx->Yi.c[12] ^= (u8)(len0>>24);
692 ctx->Yi.c[13] ^= (u8)(len0>>16);
693 ctx->Yi.c[14] ^= (u8)(len0>>8);
694 ctx->Yi.c[15] ^= (u8)(len0);
698 ctx->Yi.u[1] ^= len0;
702 if (is_endian.little)
703 ctr = GETU32(ctx->Yi.c+12);
708 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
710 if (is_endian.little)
711 PUTU32(ctx->Yi.c+12,ctr);
716 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
720 ctx->len.u[0] += len;
723 if ((i = (len&(size_t)-16))) {
730 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
737 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
742 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
743 const unsigned char *in, unsigned char *out,
746 const union { long one; char little; } is_endian = {1};
750 ctx->len.u[1] += len;
752 if (is_endian.little)
753 ctr = GETU32(ctx->Yi.c+12);
757 #if !defined(OPENSSL_SMALL_FOOTPRINT)
758 if (16%sizeof(size_t) == 0) do { /* always true actually */
761 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
765 if (n==0) GCM_MUL(ctx,Xi);
771 #if defined(STRICT_ALIGNMENT)
772 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
775 #if defined(GHASH) && defined(GHASH_CHUNK)
776 while (len>=GHASH_CHUNK) {
777 size_t j=GHASH_CHUNK;
780 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
782 if (is_endian.little)
783 PUTU32(ctx->Yi.c+12,ctr);
786 for (i=0; i<16; i+=sizeof(size_t))
788 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
793 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
796 if ((i = (len&(size_t)-16))) {
800 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
802 if (is_endian.little)
803 PUTU32(ctx->Yi.c+12,ctr);
806 for (i=0; i<16; i+=sizeof(size_t))
808 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
817 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
819 if (is_endian.little)
820 PUTU32(ctx->Yi.c+12,ctr);
823 for (i=0; i<16; i+=sizeof(size_t))
824 *(size_t *)(ctx->Xi.c+i) ^=
826 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
834 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
836 if (is_endian.little)
837 PUTU32(ctx->Yi.c+12,ctr);
841 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
850 for (i=0;i<len;++i) {
852 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
854 if (is_endian.little)
855 PUTU32(ctx->Yi.c+12,ctr);
859 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
868 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
869 const unsigned char *in, unsigned char *out,
872 const union { long one; char little; } is_endian = {1};
876 ctx->len.u[1] += len;
878 if (is_endian.little)
879 ctr = GETU32(ctx->Yi.c+12);
883 #if !defined(OPENSSL_SMALL_FOOTPRINT)
884 if (16%sizeof(size_t) == 0) do { /* always true actually */
888 *(out++) = c^ctx->EKi.c[n];
893 if (n==0) GCM_MUL (ctx,Xi);
899 #if defined(STRICT_ALIGNMENT)
900 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
903 #if defined(GHASH) && defined(GHASH_CHUNK)
904 while (len>=GHASH_CHUNK) {
905 size_t j=GHASH_CHUNK;
907 GHASH(ctx,in,GHASH_CHUNK);
909 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
911 if (is_endian.little)
912 PUTU32(ctx->Yi.c+12,ctr);
915 for (i=0; i<16; i+=sizeof(size_t))
917 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
924 if ((i = (len&(size_t)-16))) {
927 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
929 if (is_endian.little)
930 PUTU32(ctx->Yi.c+12,ctr);
933 for (i=0; i<16; i+=sizeof(size_t))
935 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
943 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
945 if (is_endian.little)
946 PUTU32(ctx->Yi.c+12,ctr);
949 for (i=0; i<16; i+=sizeof(size_t)) {
950 size_t c = *(size_t *)(in+i);
951 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
952 *(size_t *)(ctx->Xi.c+i) ^= c;
961 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
963 if (is_endian.little)
964 PUTU32(ctx->Yi.c+12,ctr);
970 out[n] = c^ctx->EKi.c[n];
979 for (i=0;i<len;++i) {
982 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
984 if (is_endian.little)
985 PUTU32(ctx->Yi.c+12,ctr);
990 out[i] ^= ctx->EKi.c[n];
1000 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
1002 const union { long one; char little; } is_endian = {1};
1003 u64 alen = ctx->len.u[0]<<3;
1004 u64 clen = ctx->len.u[1]<<3;
1009 if (is_endian.little) {
1011 alen = BSWAP8(alen);
1012 clen = BSWAP8(clen);
1016 ctx->len.u[0] = alen;
1017 ctx->len.u[1] = clen;
1019 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1020 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1024 ctx->Xi.u[0] ^= alen;
1025 ctx->Xi.u[1] ^= clen;
1028 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1029 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1032 #if defined(SELFTEST)
1034 #include <openssl/aes.h>
1037 static const u8 K1[16],
1042 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1048 static const u8 P2[16],
1049 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1050 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1054 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1055 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1056 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1057 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1058 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1059 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1060 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1061 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1062 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1063 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1064 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1069 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1070 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1071 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1072 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1073 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1074 0xab,0xad,0xda,0xd2},
1075 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1076 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1077 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1078 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1079 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1084 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1085 0xab,0xad,0xda,0xd2},
1086 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1087 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1088 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1089 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1090 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1091 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1097 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1098 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1099 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1100 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1101 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1102 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1103 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1104 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1105 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1108 static const u8 K7[24],
1113 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1119 static const u8 P8[16],
1120 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1121 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1125 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1126 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1127 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1128 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1129 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1130 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1131 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1132 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1133 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1134 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1135 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1136 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1141 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1142 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1143 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1144 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1145 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1146 0xab,0xad,0xda,0xd2},
1147 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1148 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1149 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1150 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1151 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1157 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1158 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1159 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1160 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1161 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1162 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1168 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1169 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1170 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1171 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1172 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1173 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1174 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1175 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1176 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1179 static const u8 K13[32],
1184 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1189 static const u8 P14[16],
1191 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1192 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1196 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1197 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1198 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1199 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1200 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1201 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1202 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1203 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1204 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1205 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1206 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1207 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1212 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1213 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1214 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1215 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1216 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1217 0xab,0xad,0xda,0xd2},
1218 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1219 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1220 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1221 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1222 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1228 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1229 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1230 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1231 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1232 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1233 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1239 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1240 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1241 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1242 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1243 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1244 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1245 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1246 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1247 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1249 #define TEST_CASE(n) do { \
1250 u8 out[sizeof(P##n)]; \
1251 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1252 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1253 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1254 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1255 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1256 CRYPTO_gcm128_finish(&ctx); \
1257 if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1258 ret++, printf ("encrypt test#%d failed.\n",n);\
1259 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1260 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1261 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1262 CRYPTO_gcm128_finish(&ctx); \
1263 if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1264 ret++, printf ("decrypt test#%d failed.\n",n);\
1292 #ifdef OPENSSL_CPUID_OBJ
1294 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1295 union { u64 u; u8 c[1024]; } buf;
1298 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1299 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1300 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1302 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1303 start = OPENSSL_rdtsc();
1304 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1305 gcm_t = OPENSSL_rdtsc() - start;
1307 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1308 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1309 (block128_f)AES_encrypt);
1310 start = OPENSSL_rdtsc();
1311 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1312 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1313 (block128_f)AES_encrypt);
1314 ctr_t = OPENSSL_rdtsc() - start;
1316 printf("%.2f-%.2f=%.2f\n",
1317 gcm_t/(double)sizeof(buf),
1318 ctr_t/(double)sizeof(buf),
1319 (gcm_t-ctr_t)/(double)sizeof(buf));
1321 GHASH(&ctx,buf.c,sizeof(buf));
1322 start = OPENSSL_rdtsc();
1323 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1324 gcm_t = OPENSSL_rdtsc() - start;
1325 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);