1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include "modes_lcl.h"
60 typedef struct { u64 hi,lo; } u128;
62 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
63 /* redefine, because alignment is ensured */
65 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
70 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
71 #define REDUCE1BIT(V) do { \
72 if (sizeof(size_t)==8) { \
73 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
74 V.lo = (V.hi<<63)|(V.lo>>1); \
75 V.hi = (V.hi>>1 )^T; \
78 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
79 V.lo = (V.hi<<63)|(V.lo>>1); \
80 V.hi = (V.hi>>1 )^((u64)T<<32); \
88 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
89 * never be set to 8. 8 is effectively reserved for testing purposes.
90 * Under ideal conditions "8-bit" version should be twice as fast as
91 * "4-bit" one. For gcc-generated x86[_64] code, "8-bit" was observed to
92 * run ~75% faster, closer to 100% for commercial compilers... But the
93 * catch is that "8-bit" procedure consumes 16 times more memory, 4KB
94 * per indivudual key + 1KB shared, and as access to these tables end up
95 * on critical path, real-life execution time would be sensitive to
96 * cache timing. It's not actually proven, but "4-bit" procedure is
97 * believed to provide adequate all-round performance...
103 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
113 for (Htable[128]=V, i=64; i>0; i>>=1) {
118 for (i=2; i<256; i<<=1) {
119 u128 *Hi = Htable+i, H0 = *Hi;
120 for (j=1; j<i; ++j) {
121 Hi[j].hi = H0.hi^Htable[j].hi;
122 Hi[j].lo = H0.lo^Htable[j].lo;
127 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
130 const u8 *xi = (const u8 *)Xi+15;
132 const union { long one; char little; } is_endian = {1};
133 static const size_t rem_8bit[256] = {
134 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
135 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
136 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
137 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
138 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
139 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
140 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
141 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
142 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
143 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
144 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
145 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
146 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
147 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
148 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
149 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
150 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
151 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
152 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
153 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
154 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
155 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
156 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
157 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
158 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
159 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
160 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
161 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
162 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
163 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
164 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
165 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
166 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
167 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
168 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
169 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
170 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
171 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
172 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
173 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
174 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
175 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
176 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
177 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
178 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
179 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
180 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
181 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
182 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
183 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
184 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
185 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
186 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
187 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
188 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
189 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
190 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
191 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
192 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
193 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
194 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
195 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
196 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
197 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
200 Z.hi ^= Htable[n].hi;
201 Z.lo ^= Htable[n].lo;
203 if ((u8 *)Xi==xi) break;
207 rem = (size_t)Z.lo&0xff;
208 Z.lo = (Z.hi<<56)|(Z.lo>>8);
210 if (sizeof(size_t)==8)
211 Z.hi ^= rem_8bit[rem];
213 Z.hi ^= (u64)rem_8bit[rem]<<32;
216 if (is_endian.little) {
218 Xi[0] = BSWAP8(Z.hi);
219 Xi[1] = BSWAP8(Z.lo);
223 v = (u32)(Z.hi>>32); PUTU32(p,v);
224 v = (u32)(Z.hi); PUTU32(p+4,v);
225 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
226 v = (u32)(Z.lo); PUTU32(p+12,v);
234 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
238 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
241 #if defined(OPENSSL_SMALL_FOOTPRINT)
250 #if defined(OPENSSL_SMALL_FOOTPRINT)
251 for (Htable[8]=V, i=4; i>0; i>>=1) {
256 for (i=2; i<16; i<<=1) {
259 for (V=*Hi, j=1; j<i; ++j) {
260 Hi[j].hi = V.hi^Htable[j].hi;
261 Hi[j].lo = V.lo^Htable[j].lo;
272 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
274 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
275 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
276 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
278 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
279 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
280 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
281 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
282 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
283 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
284 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
286 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
288 * ARM assembler expects specific dword order in Htable.
292 const union { long one; char little; } is_endian = {1};
294 if (is_endian.little)
303 Htable[j].hi = V.lo<<32|V.lo>>32;
304 Htable[j].lo = V.hi<<32|V.hi>>32;
311 static const size_t rem_4bit[16] = {
312 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
313 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
314 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
315 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
317 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
321 size_t rem, nlo, nhi;
322 const union { long one; char little; } is_endian = {1};
324 nlo = ((const u8 *)Xi)[15];
328 Z.hi = Htable[nlo].hi;
329 Z.lo = Htable[nlo].lo;
332 rem = (size_t)Z.lo&0xf;
333 Z.lo = (Z.hi<<60)|(Z.lo>>4);
335 if (sizeof(size_t)==8)
336 Z.hi ^= rem_4bit[rem];
338 Z.hi ^= (u64)rem_4bit[rem]<<32;
340 Z.hi ^= Htable[nhi].hi;
341 Z.lo ^= Htable[nhi].lo;
345 nlo = ((const u8 *)Xi)[cnt];
349 rem = (size_t)Z.lo&0xf;
350 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 if (sizeof(size_t)==8)
353 Z.hi ^= rem_4bit[rem];
355 Z.hi ^= (u64)rem_4bit[rem]<<32;
357 Z.hi ^= Htable[nlo].hi;
358 Z.lo ^= Htable[nlo].lo;
361 if (is_endian.little) {
363 Xi[0] = BSWAP8(Z.hi);
364 Xi[1] = BSWAP8(Z.lo);
368 v = (u32)(Z.hi>>32); PUTU32(p,v);
369 v = (u32)(Z.hi); PUTU32(p+4,v);
370 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
371 v = (u32)(Z.lo); PUTU32(p+12,v);
380 #if !defined(OPENSSL_SMALL_FOOTPRINT)
382 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
383 * details... Compiler-generated code doesn't seem to give any
384 * performance improvement, at least not on x86[_64]. It's here
385 * mostly as reference and a placeholder for possible future
386 * non-trivial optimization[s]...
388 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
389 const u8 *inp,size_t len)
393 size_t rem, nlo, nhi;
394 const union { long one; char little; } is_endian = {1};
398 nlo = ((const u8 *)Xi)[15];
403 Z.hi = Htable[nlo].hi;
404 Z.lo = Htable[nlo].lo;
407 rem = (size_t)Z.lo&0xf;
408 Z.lo = (Z.hi<<60)|(Z.lo>>4);
410 if (sizeof(size_t)==8)
411 Z.hi ^= rem_4bit[rem];
413 Z.hi ^= (u64)rem_4bit[rem]<<32;
415 Z.hi ^= Htable[nhi].hi;
416 Z.lo ^= Htable[nhi].lo;
420 nlo = ((const u8 *)Xi)[cnt];
425 rem = (size_t)Z.lo&0xf;
426 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 if (sizeof(size_t)==8)
429 Z.hi ^= rem_4bit[rem];
431 Z.hi ^= (u64)rem_4bit[rem]<<32;
433 Z.hi ^= Htable[nlo].hi;
434 Z.lo ^= Htable[nlo].lo;
437 if (is_endian.little) {
439 Xi[0] = BSWAP8(Z.hi);
440 Xi[1] = BSWAP8(Z.lo);
444 v = (u32)(Z.hi>>32); PUTU32(p,v);
445 v = (u32)(Z.hi); PUTU32(p+4,v);
446 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
447 v = (u32)(Z.lo); PUTU32(p+12,v);
454 } while (inp+=16, len-=16);
458 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
459 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
462 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
463 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
464 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
465 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
466 * trashing effect. In other words idea is to hash data while it's
467 * still in L1 cache after encryption pass... */
468 #define GHASH_CHUNK 1024
471 #else /* TABLE_BITS */
473 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
478 const long *xi = (const long *)Xi;
479 const union { long one; char little; } is_endian = {1};
481 V.hi = H[0]; /* H is in host byte order, no byte swapping */
484 for (j=0; j<16/sizeof(long); ++j) {
485 if (is_endian.little) {
486 if (sizeof(long)==8) {
488 X = (long)(BSWAP8(xi[j]));
490 const u8 *p = (const u8 *)(xi+j);
491 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
495 const u8 *p = (const u8 *)(xi+j);
502 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
503 u64 M = (u64)(X>>(8*sizeof(long)-1));
511 if (is_endian.little) {
513 Xi[0] = BSWAP8(Z.hi);
514 Xi[1] = BSWAP8(Z.lo);
518 v = (u32)(Z.hi>>32); PUTU32(p,v);
519 v = (u32)(Z.hi); PUTU32(p+4,v);
520 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
521 v = (u32)(Z.lo); PUTU32(p+12,v);
529 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
533 struct gcm128_context {
534 /* Following 6 names follow names in GCM specification */
535 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
537 /* Pre-computed table used by gcm_gmult_* */
542 void (*gmult)(u64 Xi[2],const u128 Htable[16]);
543 void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
545 unsigned int res, pad;
550 #if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
551 (defined(__i386) || defined(__i386__) || \
552 defined(__x86_64) || defined(__x86_64__) || \
553 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
554 # define GHASH_ASM_IAX
555 extern unsigned int OPENSSL_ia32cap_P[2];
557 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
558 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
559 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
561 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
562 # define GHASH_ASM_X86
563 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
564 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
566 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
567 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
571 # define GCM_MUL(ctx,Xi) (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
573 # define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
576 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
578 const union { long one; char little; } is_endian = {1};
580 memset(ctx,0,sizeof(*ctx));
584 (*block)(ctx->H.c,ctx->H.c,key);
586 if (is_endian.little) {
587 /* H is stored in host byte order */
589 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
590 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
594 hi = (u64)GETU32(p) <<32|GETU32(p+4);
595 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
602 gcm_init_8bit(ctx->Htable,ctx->H.u);
604 # if defined(GHASH_ASM_IAX)
605 if (OPENSSL_ia32cap_P[1]&(1<<1)) {
606 gcm_init_clmul(ctx->Htable,ctx->H.u);
607 ctx->gmult = gcm_gmult_clmul;
608 ctx->ghash = gcm_ghash_clmul;
611 gcm_init_4bit(ctx->Htable,ctx->H.u);
612 # if defined(GHASH_ASM_X86)
613 if (OPENSSL_ia32cap_P[0]&(1<<23)) {
614 ctx->gmult = gcm_gmult_4bit_mmx;
615 ctx->ghash = gcm_ghash_4bit_mmx;
617 ctx->gmult = gcm_gmult_4bit_x86;
618 ctx->ghash = gcm_ghash_4bit_x86;
621 ctx->gmult = gcm_gmult_4bit;
622 ctx->ghash = gcm_ghash_4bit;
625 gcm_init_4bit(ctx->Htable,ctx->H.u);
630 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
632 const union { long one; char little; } is_endian = {1};
644 memcpy(ctx->Yi.c,iv,12);
653 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
659 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
663 if (is_endian.little) {
665 ctx->Yi.u[1] ^= BSWAP8(len0);
667 ctx->Yi.c[8] ^= (u8)(len0>>56);
668 ctx->Yi.c[9] ^= (u8)(len0>>48);
669 ctx->Yi.c[10] ^= (u8)(len0>>40);
670 ctx->Yi.c[11] ^= (u8)(len0>>32);
671 ctx->Yi.c[12] ^= (u8)(len0>>24);
672 ctx->Yi.c[13] ^= (u8)(len0>>16);
673 ctx->Yi.c[14] ^= (u8)(len0>>8);
674 ctx->Yi.c[15] ^= (u8)(len0);
678 ctx->Yi.u[1] ^= len0;
682 if (is_endian.little)
683 ctr = GETU32(ctx->Yi.c+12);
688 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
690 if (is_endian.little)
691 PUTU32(ctx->Yi.c+12,ctr);
696 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
700 ctx->len.u[0] += len;
703 if ((i = (len&(size_t)-16))) {
710 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
717 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
722 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
723 const unsigned char *in, unsigned char *out,
726 const union { long one; char little; } is_endian = {1};
730 ctx->len.u[1] += len;
732 if (is_endian.little)
733 ctr = GETU32(ctx->Yi.c+12);
737 #if !defined(OPENSSL_SMALL_FOOTPRINT)
738 if (16%sizeof(size_t) == 0) do { /* always true actually */
741 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
745 if (n==0) GCM_MUL(ctx,Xi);
751 #if defined(STRICT_ALIGNMENT)
752 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
755 #if defined(GHASH) && defined(GHASH_CHUNK)
756 while (len>=GHASH_CHUNK) {
757 size_t j=GHASH_CHUNK;
760 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
762 if (is_endian.little)
763 PUTU32(ctx->Yi.c+12,ctr);
766 for (i=0; i<16; i+=sizeof(size_t))
768 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
773 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
776 if ((i = (len&(size_t)-16))) {
780 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
782 if (is_endian.little)
783 PUTU32(ctx->Yi.c+12,ctr);
786 for (i=0; i<16; i+=sizeof(size_t))
788 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
797 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
799 if (is_endian.little)
800 PUTU32(ctx->Yi.c+12,ctr);
803 for (i=0; i<16; i+=sizeof(size_t))
804 *(size_t *)(ctx->Xi.c+i) ^=
806 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
814 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
816 if (is_endian.little)
817 PUTU32(ctx->Yi.c+12,ctr);
821 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
830 for (i=0;i<len;++i) {
832 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
834 if (is_endian.little)
835 PUTU32(ctx->Yi.c+12,ctr);
839 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
848 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
849 const unsigned char *in, unsigned char *out,
852 const union { long one; char little; } is_endian = {1};
856 ctx->len.u[1] += len;
858 if (is_endian.little)
859 ctr = GETU32(ctx->Yi.c+12);
863 #if !defined(OPENSSL_SMALL_FOOTPRINT)
864 if (16%sizeof(size_t) == 0) do { /* always true actually */
868 *(out++) = c^ctx->EKi.c[n];
873 if (n==0) GCM_MUL (ctx,Xi);
879 #if defined(STRICT_ALIGNMENT)
880 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
883 #if defined(GHASH) && defined(GHASH_CHUNK)
884 while (len>=GHASH_CHUNK) {
885 size_t j=GHASH_CHUNK;
887 GHASH(ctx,in,GHASH_CHUNK);
889 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
891 if (is_endian.little)
892 PUTU32(ctx->Yi.c+12,ctr);
895 for (i=0; i<16; i+=sizeof(size_t))
897 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
904 if ((i = (len&(size_t)-16))) {
907 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
909 if (is_endian.little)
910 PUTU32(ctx->Yi.c+12,ctr);
913 for (i=0; i<16; i+=sizeof(size_t))
915 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
923 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
925 if (is_endian.little)
926 PUTU32(ctx->Yi.c+12,ctr);
929 for (i=0; i<16; i+=sizeof(size_t)) {
930 size_t c = *(size_t *)(in+i);
931 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
932 *(size_t *)(ctx->Xi.c+i) ^= c;
941 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
943 if (is_endian.little)
944 PUTU32(ctx->Yi.c+12,ctr);
950 out[n] = c^ctx->EKi.c[n];
959 for (i=0;i<len;++i) {
962 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
964 if (is_endian.little)
965 PUTU32(ctx->Yi.c+12,ctr);
970 out[i] ^= ctx->EKi.c[n];
980 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
982 const union { long one; char little; } is_endian = {1};
983 u64 alen = ctx->len.u[0]<<3;
984 u64 clen = ctx->len.u[1]<<3;
989 if (is_endian.little) {
996 ctx->len.u[0] = alen;
997 ctx->len.u[1] = clen;
999 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1000 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1004 ctx->Xi.u[0] ^= alen;
1005 ctx->Xi.u[1] ^= clen;
1008 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1009 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1012 #if defined(SELFTEST)
1014 #include <openssl/aes.h>
1017 static const u8 K1[16],
1022 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1028 static const u8 P2[16],
1029 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1030 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1034 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1035 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1036 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1037 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1038 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1039 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1040 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1041 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1042 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1043 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1044 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4,};
1049 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1050 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1051 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1052 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1053 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1054 0xab,0xad,0xda,0xd2},
1055 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1056 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1057 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1058 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1059 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1064 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1065 0xab,0xad,0xda,0xd2},
1066 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1067 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1068 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1069 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1070 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1071 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1077 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1078 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1079 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1080 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1081 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1082 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1083 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1084 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1085 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1088 static const u8 K7[24],
1093 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1099 static const u8 P8[16],
1100 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1101 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1105 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1106 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1107 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1108 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1109 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1110 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1111 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1112 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1113 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1114 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1115 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1116 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1121 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1122 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1123 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1124 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1125 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1126 0xab,0xad,0xda,0xd2},
1127 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1128 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1129 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1130 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1131 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1137 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1138 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1139 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1140 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1141 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1142 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1148 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1149 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1150 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1151 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1152 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1153 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1154 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1155 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1156 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1159 static const u8 K13[32],
1164 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1169 static const u8 P14[16],
1171 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1172 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1176 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1177 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1178 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1179 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1180 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1181 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1182 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1183 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1184 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1185 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1186 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1187 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1192 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1193 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1194 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1195 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1196 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1197 0xab,0xad,0xda,0xd2},
1198 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1199 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1200 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1201 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1202 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1208 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1209 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1210 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1211 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1212 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1213 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1219 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1220 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1221 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1222 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1223 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1224 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1225 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1226 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1227 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1229 #define TEST_CASE(n) do { \
1230 u8 out[sizeof(P##n)]; \
1231 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1232 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1233 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1234 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1235 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1236 CRYPTO_gcm128_finish(&ctx); \
1237 if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1238 ret++, printf ("encrypt test#%d failed.\n",n);\
1239 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1240 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1241 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1242 CRYPTO_gcm128_finish(&ctx); \
1243 if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1244 ret++, printf ("decrypt test#%d failed.\n",n);\
1272 #ifdef OPENSSL_CPUID_OBJ
1274 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1275 union { u64 u; u8 c[1024]; } buf;
1278 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1279 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1280 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1282 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1283 start = OPENSSL_rdtsc();
1284 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1285 gcm_t = OPENSSL_rdtsc() - start;
1287 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1288 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1289 (block128_f)AES_encrypt);
1290 start = OPENSSL_rdtsc();
1291 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1292 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1293 (block128_f)AES_encrypt);
1294 ctr_t = OPENSSL_rdtsc() - start;
1296 printf("%.2f-%.2f=%.2f\n",
1297 gcm_t/(double)sizeof(buf),
1298 ctr_t/(double)sizeof(buf),
1299 (gcm_t-ctr_t)/(double)sizeof(buf));
1301 GHASH(&ctx,buf.c,sizeof(buf));
1302 start = OPENSSL_rdtsc();
1303 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1304 gcm_t = OPENSSL_rdtsc() - start;
1305 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);