1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include "modes_lcl.h"
60 typedef struct { u64 hi,lo; } u128;
62 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
63 /* redefine, because alignment is ensured */
65 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
70 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
75 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
76 * never be set to 8. 8 is effectively reserved for testing purposes.
77 * Under ideal conditions "8-bit" version should be twice as fast as
78 * "4-bit" one. But world is far from ideal. For gcc-generated x86 code,
79 * "8-bit" was observed to run only ~50% faster. On x86_64 observed
80 * improvement was ~75%, much closer to optimal, but the fact of
81 * deviation means that references to pre-computed tables end up on
82 * critical path and as tables are pretty big, 4KB per key+1KB shared,
83 * execution time is sensitive to cache timing. It's not actually
84 * proven, but 4-bit procedure is believed to provide adequate
85 * all-round performance...
91 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
101 for (Htable[128]=V, i=64; i>0; i>>=1) {
102 if (sizeof(size_t)==8) {
103 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
104 V.lo = (V.hi<<63)|(V.lo>>1);
108 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
109 V.lo = (V.hi<<63)|(V.lo>>1);
110 V.hi = (V.hi>>1 )^((u64)T<<32);
115 for (i=2; i<256; i<<=1) {
116 u128 *Hi = Htable+i, H0 = *Hi;
117 for (j=1; j<i; ++j) {
118 Hi[j].hi = H0.hi^Htable[j].hi;
119 Hi[j].lo = H0.lo^Htable[j].lo;
124 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
127 const u8 *xi = (const u8 *)Xi+15;
129 const union { long one; char little; } is_endian = {1};
130 static const size_t rem_8bit[256] = {
131 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
132 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
133 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
134 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
135 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
136 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
137 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
138 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
139 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
140 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
141 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
142 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
143 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
144 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
145 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
146 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
147 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
148 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
149 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
150 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
151 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
152 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
153 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
154 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
155 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
156 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
157 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
158 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
159 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
160 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
161 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
162 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
163 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
164 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
165 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
166 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
167 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
168 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
169 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
170 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
171 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
172 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
173 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
174 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
175 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
176 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
177 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
178 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
179 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
180 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
181 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
182 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
183 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
184 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
185 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
186 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
187 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
188 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
189 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
190 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
191 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
192 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
193 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
194 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
197 Z.hi ^= Htable[n].hi;
198 Z.lo ^= Htable[n].lo;
200 if ((u8 *)Xi==xi) break;
204 rem = (size_t)Z.lo&0xff;
205 Z.lo = (Z.hi<<56)|(Z.lo>>8);
207 if (sizeof(size_t)==8)
208 Z.hi ^= rem_8bit[rem];
210 Z.hi ^= (u64)rem_8bit[rem]<<32;
213 if (is_endian.little) {
215 Xi[0] = BSWAP8(Z.hi);
216 Xi[1] = BSWAP8(Z.lo);
220 v = (u32)(Z.hi>>32); PUTU32(p,v);
221 v = (u32)(Z.hi); PUTU32(p+4,v);
222 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
223 v = (u32)(Z.lo); PUTU32(p+12,v);
231 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
235 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
238 #if defined(OPENSSL_SMALL_FOOTPRINT)
241 #define REDUCE(V) do { \
242 if (sizeof(size_t)==8) { \
243 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
244 V.lo = (V.hi<<63)|(V.lo>>1); \
245 V.hi = (V.hi>>1 )^T; \
248 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
249 V.lo = (V.hi<<63)|(V.lo>>1); \
250 V.hi = (V.hi>>1 )^((u64)T<<32); \
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260 for (Htable[8]=V, i=4; i>0; i>>=1) {
265 for (i=2; i<16; i<<=1) {
268 for (V=*Hi, j=1; j<i; ++j) {
269 Hi[j].hi = V.hi^Htable[j].hi;
270 Hi[j].lo = V.lo^Htable[j].lo;
281 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
283 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
284 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
285 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
287 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
288 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
289 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
290 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
291 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
292 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
293 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
295 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
297 * ARM assembler expects specific dword order in Htable.
301 const union { long one; char little; } is_endian = {1};
303 if (is_endian.little)
312 Htable[j].hi = V.lo<<32|V.lo>>32;
313 Htable[j].lo = V.hi<<32|V.hi>>32;
321 static const size_t rem_4bit[16] = {
322 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
323 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
324 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
325 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
327 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
331 size_t rem, nlo, nhi;
332 const union { long one; char little; } is_endian = {1};
334 nlo = ((const u8 *)Xi)[15];
338 Z.hi = Htable[nlo].hi;
339 Z.lo = Htable[nlo].lo;
342 rem = (size_t)Z.lo&0xf;
343 Z.lo = (Z.hi<<60)|(Z.lo>>4);
345 if (sizeof(size_t)==8)
346 Z.hi ^= rem_4bit[rem];
348 Z.hi ^= (u64)rem_4bit[rem]<<32;
350 Z.hi ^= Htable[nhi].hi;
351 Z.lo ^= Htable[nhi].lo;
355 nlo = ((const u8 *)Xi)[cnt];
359 rem = (size_t)Z.lo&0xf;
360 Z.lo = (Z.hi<<60)|(Z.lo>>4);
362 if (sizeof(size_t)==8)
363 Z.hi ^= rem_4bit[rem];
365 Z.hi ^= (u64)rem_4bit[rem]<<32;
367 Z.hi ^= Htable[nlo].hi;
368 Z.lo ^= Htable[nlo].lo;
371 if (is_endian.little) {
373 Xi[0] = BSWAP8(Z.hi);
374 Xi[1] = BSWAP8(Z.lo);
378 v = (u32)(Z.hi>>32); PUTU32(p,v);
379 v = (u32)(Z.hi); PUTU32(p+4,v);
380 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
381 v = (u32)(Z.lo); PUTU32(p+12,v);
390 #if !defined(OPENSSL_SMALL_FOOTPRINT)
392 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
393 * details... Compiler-generated code doesn't seem to give any
394 * performance improvement, at least not on x86[_64]. It's here
395 * mostly as reference and a placeholder for possible future
396 * non-trivial optimization[s]...
398 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
399 const u8 *inp,size_t len)
403 size_t rem, nlo, nhi;
404 const union { long one; char little; } is_endian = {1};
408 nlo = ((const u8 *)Xi)[15];
413 Z.hi = Htable[nlo].hi;
414 Z.lo = Htable[nlo].lo;
417 rem = (size_t)Z.lo&0xf;
418 Z.lo = (Z.hi<<60)|(Z.lo>>4);
420 if (sizeof(size_t)==8)
421 Z.hi ^= rem_4bit[rem];
423 Z.hi ^= (u64)rem_4bit[rem]<<32;
425 Z.hi ^= Htable[nhi].hi;
426 Z.lo ^= Htable[nhi].lo;
430 nlo = ((const u8 *)Xi)[cnt];
435 rem = (size_t)Z.lo&0xf;
436 Z.lo = (Z.hi<<60)|(Z.lo>>4);
438 if (sizeof(size_t)==8)
439 Z.hi ^= rem_4bit[rem];
441 Z.hi ^= (u64)rem_4bit[rem]<<32;
443 Z.hi ^= Htable[nlo].hi;
444 Z.lo ^= Htable[nlo].lo;
447 if (is_endian.little) {
449 Xi[0] = BSWAP8(Z.hi);
450 Xi[1] = BSWAP8(Z.lo);
454 v = (u32)(Z.hi>>32); PUTU32(p,v);
455 v = (u32)(Z.hi); PUTU32(p+4,v);
456 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
457 v = (u32)(Z.lo); PUTU32(p+12,v);
464 } while (inp+=16, len-=16);
468 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
469 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
472 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
473 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
474 #define GHASH(in,len,ctx) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
475 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
476 * trashing effect. In other words idea is to hash data while it's
477 * still in L1 cache after encryption pass... */
478 #define GHASH_CHUNK 1024
481 #else /* TABLE_BITS */
483 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
488 const long *xi = (const long *)Xi;
489 const union { long one; char little; } is_endian = {1};
491 V.hi = H[0]; /* H is in host byte order, no byte swapping */
494 for (j=0; j<16/sizeof(long); ++j) {
495 if (is_endian.little) {
496 if (sizeof(long)==8) {
498 X = (long)(BSWAP8(xi[j]));
500 const u8 *p = (const u8 *)(xi+j);
501 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
505 const u8 *p = (const u8 *)(xi+j);
512 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
513 u64 M = (u64)(X>>(8*sizeof(long)-1));
517 if (sizeof(size_t)==8) {
518 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
519 V.lo = (V.hi<<63)|(V.lo>>1);
523 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
524 V.lo = (V.hi<<63)|(V.lo>>1);
525 V.hi = (V.hi>>1 )^((u64)T<<32);
531 if (is_endian.little) {
533 Xi[0] = BSWAP8(Z.hi);
534 Xi[1] = BSWAP8(Z.lo);
538 v = (u32)(Z.hi>>32); PUTU32(p,v);
539 v = (u32)(Z.hi); PUTU32(p+4,v);
540 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
541 v = (u32)(Z.lo); PUTU32(p+12,v);
549 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
553 struct gcm128_context {
554 /* Following 6 names follow names in GCM specification */
555 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
557 /* Pre-computed table used by gcm_gmult_* */
563 unsigned int res, pad;
568 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
570 const union { long one; char little; } is_endian = {1};
572 memset(ctx,0,sizeof(*ctx));
576 (*block)(ctx->H.c,ctx->H.c,key);
578 if (is_endian.little) {
579 /* H is stored in host byte order */
581 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
582 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
586 hi = (u64)GETU32(p) <<32|GETU32(p+4);
587 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
594 gcm_init_8bit(ctx->Htable,ctx->H.u);
596 gcm_init_4bit(ctx->Htable,ctx->H.u);
600 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
602 const union { long one; char little; } is_endian = {1};
614 memcpy(ctx->Yi.c,iv,12);
623 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
629 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
633 if (is_endian.little) {
635 ctx->Yi.u[1] ^= BSWAP8(len0);
637 ctx->Yi.c[8] ^= (u8)(len0>>56);
638 ctx->Yi.c[9] ^= (u8)(len0>>48);
639 ctx->Yi.c[10] ^= (u8)(len0>>40);
640 ctx->Yi.c[11] ^= (u8)(len0>>32);
641 ctx->Yi.c[12] ^= (u8)(len0>>24);
642 ctx->Yi.c[13] ^= (u8)(len0>>16);
643 ctx->Yi.c[14] ^= (u8)(len0>>8);
644 ctx->Yi.c[15] ^= (u8)(len0);
648 ctx->Yi.u[1] ^= len0;
652 if (is_endian.little)
653 ctr = GETU32(ctx->Yi.c+12);
658 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
660 if (is_endian.little)
661 PUTU32(ctx->Yi.c+12,ctr);
666 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
670 ctx->len.u[0] += len;
673 if ((i = (len&(size_t)-16))) {
680 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
687 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
692 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
693 const unsigned char *in, unsigned char *out,
696 const union { long one; char little; } is_endian = {1};
700 ctx->len.u[1] += len;
702 if (is_endian.little)
703 ctr = GETU32(ctx->Yi.c+12);
707 #if !defined(OPENSSL_SMALL_FOOTPRINT)
708 if (16%sizeof(size_t) == 0) do { /* always true actually */
711 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
715 if (n==0) GCM_MUL(ctx,Xi);
721 #if defined(STRICT_ALIGNMENT)
722 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
725 #if defined(GHASH) && defined(GHASH_CHUNK)
726 while (len>=GHASH_CHUNK) {
727 size_t j=GHASH_CHUNK;
730 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
732 if (is_endian.little)
733 PUTU32(ctx->Yi.c+12,ctr);
736 for (i=0; i<16; i+=sizeof(size_t))
738 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
743 GHASH(out-GHASH_CHUNK,GHASH_CHUNK,ctx);
746 if ((i = (len&(size_t)-16))) {
750 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
752 if (is_endian.little)
753 PUTU32(ctx->Yi.c+12,ctr);
756 for (i=0; i<16; i+=sizeof(size_t))
758 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
767 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
769 if (is_endian.little)
770 PUTU32(ctx->Yi.c+12,ctr);
773 for (i=0; i<16; i+=sizeof(size_t))
774 *(size_t *)(ctx->Xi.c+i) ^=
776 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
784 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
786 if (is_endian.little)
787 PUTU32(ctx->Yi.c+12,ctr);
791 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
800 for (i=0;i<len;++i) {
802 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
804 if (is_endian.little)
805 PUTU32(ctx->Yi.c+12,ctr);
809 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
818 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
819 const unsigned char *in, unsigned char *out,
822 const union { long one; char little; } is_endian = {1};
826 ctx->len.u[1] += len;
828 if (is_endian.little)
829 ctr = GETU32(ctx->Yi.c+12);
833 #if !defined(OPENSSL_SMALL_FOOTPRINT)
834 if (16%sizeof(size_t) == 0) do { /* always true actually */
838 *(out++) = c^ctx->EKi.c[n];
843 if (n==0) GCM_MUL (ctx,Xi);
849 #if defined(STRICT_ALIGNMENT)
850 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
853 #if defined(GHASH) && defined(GHASH_CHUNK)
854 while (len>=GHASH_CHUNK) {
855 size_t j=GHASH_CHUNK;
857 GHASH(in,GHASH_CHUNK,ctx);
859 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
861 if (is_endian.little)
862 PUTU32(ctx->Yi.c+12,ctr);
865 for (i=0; i<16; i+=sizeof(size_t))
867 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
874 if ((i = (len&(size_t)-16))) {
877 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
879 if (is_endian.little)
880 PUTU32(ctx->Yi.c+12,ctr);
883 for (i=0; i<16; i+=sizeof(size_t))
885 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
893 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
895 if (is_endian.little)
896 PUTU32(ctx->Yi.c+12,ctr);
899 for (i=0; i<16; i+=sizeof(size_t)) {
900 size_t c = *(size_t *)(in+i);
901 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
902 *(size_t *)(ctx->Xi.c+i) ^= c;
911 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
913 if (is_endian.little)
914 PUTU32(ctx->Yi.c+12,ctr);
920 out[n] = c^ctx->EKi.c[n];
929 for (i=0;i<len;++i) {
932 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
934 if (is_endian.little)
935 PUTU32(ctx->Yi.c+12,ctr);
940 out[i] ^= ctx->EKi.c[n];
950 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
952 const union { long one; char little; } is_endian = {1};
953 u64 alen = ctx->len.u[0]<<3;
954 u64 clen = ctx->len.u[1]<<3;
959 if (is_endian.little) {
966 ctx->len.u[0] = alen;
967 ctx->len.u[1] = clen;
969 alen = (u64)GETU32(p) <<32|GETU32(p+4);
970 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
974 ctx->Xi.u[0] ^= alen;
975 ctx->Xi.u[1] ^= clen;
978 ctx->Xi.u[0] ^= ctx->EK0.u[0];
979 ctx->Xi.u[1] ^= ctx->EK0.u[1];
982 #if defined(SELFTEST)
984 #include <openssl/aes.h>
987 static const u8 K1[16],
992 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
998 static const u8 P2[16],
999 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1000 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1004 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1005 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1006 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1007 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1008 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1009 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1010 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1011 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1012 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1013 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1014 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4,};
1019 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1020 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1021 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1022 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1023 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1024 0xab,0xad,0xda,0xd2},
1025 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1026 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1027 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1028 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1029 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1034 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1035 0xab,0xad,0xda,0xd2},
1036 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1037 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1038 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1039 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1040 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1041 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1047 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1048 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1049 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1050 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1051 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1052 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1053 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1054 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1055 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1058 static const u8 K7[24],
1063 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1069 static const u8 P8[16],
1070 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1071 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1075 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1076 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1077 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1078 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1079 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1080 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1081 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1082 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1083 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1084 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1085 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1086 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1091 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1092 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1093 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1094 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1095 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1096 0xab,0xad,0xda,0xd2},
1097 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1098 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1099 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1100 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1101 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1107 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1108 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1109 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1110 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1111 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1112 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1118 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1119 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1120 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1121 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1122 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1123 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1124 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1125 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1126 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1129 static const u8 K13[32],
1134 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1139 static const u8 P14[16],
1141 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1142 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1146 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1147 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1148 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1149 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1150 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1151 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1152 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1153 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1154 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1155 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1156 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1157 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1162 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1163 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1164 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1165 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1166 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1167 0xab,0xad,0xda,0xd2},
1168 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1169 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1170 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1171 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1172 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1178 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1179 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1180 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1181 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1182 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1183 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1189 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1190 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1191 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1192 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1193 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1194 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1195 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1196 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1197 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1199 #define TEST_CASE(n) do { \
1200 u8 out[sizeof(P##n)]; \
1201 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1202 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1203 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1204 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1205 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1206 CRYPTO_gcm128_finish(&ctx); \
1207 if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1208 ret++, printf ("encrypt test#%d failed.\n",n);\
1209 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1210 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1211 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1212 CRYPTO_gcm128_finish(&ctx); \
1213 if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1214 ret++, printf ("decrypt test#%d failed.\n",n);\
1242 #ifdef OPENSSL_CPUID_OBJ
1244 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1245 union { u64 u; u8 c[1024]; } buf;
1247 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1248 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1249 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1251 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1252 start = OPENSSL_rdtsc();
1253 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1254 gcm_t = OPENSSL_rdtsc() - start;
1256 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1257 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1258 (block128_f)AES_encrypt);
1259 start = OPENSSL_rdtsc();
1260 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1261 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1262 (block128_f)AES_encrypt);
1263 ctr_t = OPENSSL_rdtsc() - start;
1265 printf("%.2f-%.2f=%.2f\n",
1266 gcm_t/(double)sizeof(buf),
1267 ctr_t/(double)sizeof(buf),
1268 (gcm_t-ctr_t)/(double)sizeof(buf));
1270 GHASH(buf.c,sizeof(buf),&ctx);
1271 start = OPENSSL_rdtsc();
1272 GHASH(buf.c,sizeof(buf),&ctx);
1273 gcm_t = OPENSSL_rdtsc() - start;
1274 printf("%.2f\n",gcm_t/(double)sizeof(buf));