1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
222 if ((u8 *)Xi==xi) break;
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 if (is_endian.little) {
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
253 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270 for (Htable[8]=V, i=4; i>0; i>>=1) {
275 for (i=2; i<16; i<<=1) {
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
307 * ARM assembler expects specific dword order in Htable.
311 const union { long one; char little; } is_endian = {1};
313 if (is_endian.little)
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
341 size_t rem, nlo, nhi;
342 const union { long one; char little; } is_endian = {1};
344 nlo = ((const u8 *)Xi)[15];
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
365 nlo = ((const u8 *)Xi)[cnt];
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
381 if (is_endian.little) {
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
419 nlo = ((const u8 *)Xi)[15];
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
441 nlo = ((const u8 *)Xi)[cnt];
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
461 * the rem_8bit even here, but the priority is to minimize
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516 nlo = ((const u8 *)Xi)[cnt];
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
524 rem = (size_t)Z.lo&0xff;
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
534 nlo = ((const u8 *)Xi)[0];
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
542 rem = (size_t)Z.lo&0xf;
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
552 if (is_endian.little) {
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
569 } while (inp+=16, len-=16);
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
577 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK (3*1024)
586 #else /* TABLE_BITS */
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
603 X = (long)(BSWAP8(xi[j]));
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
610 const u8 *p = (const u8 *)(xi+j);
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
626 if (is_endian.little) {
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
644 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
648 #if TABLE_BITS==4 && defined(GHASH_ASM)
649 # if !defined(I386_ONLY) && \
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
653 # define GHASH_ASM_X86_OR_64
654 # define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define GHASH_ASM_X86
663 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
667 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669 # elif defined(__arm__) || defined(__arm)
670 # include "arm_arch.h"
672 # define GHASH_ASM_ARM
673 # define GCM_FUNCREF_4BIT
674 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
675 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
677 # elif defined(__sparc__) || defined(__sparc)
678 # include "sparc_arch.h"
679 # define GHASH_ASM_SPARC
680 # define GCM_FUNCREF_4BIT
681 extern unsigned int OPENSSL_sparcv9cap_P[];
682 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
683 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
687 #ifdef GCM_FUNCREF_4BIT
689 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
692 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
696 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
698 const union { long one; char little; } is_endian = {1};
700 memset(ctx,0,sizeof(*ctx));
704 (*block)(ctx->H.c,ctx->H.c,key);
706 if (is_endian.little) {
707 /* H is stored in host byte order */
709 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
710 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
714 hi = (u64)GETU32(p) <<32|GETU32(p+4);
715 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
722 gcm_init_8bit(ctx->Htable,ctx->H.u);
724 # if defined(GHASH_ASM_X86_OR_64)
725 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
726 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
727 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
728 gcm_init_clmul(ctx->Htable,ctx->H.u);
729 ctx->gmult = gcm_gmult_clmul;
730 ctx->ghash = gcm_ghash_clmul;
734 gcm_init_4bit(ctx->Htable,ctx->H.u);
735 # if defined(GHASH_ASM_X86) /* x86 only */
736 # if defined(OPENSSL_IA32_SSE2)
737 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
739 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
741 ctx->gmult = gcm_gmult_4bit_mmx;
742 ctx->ghash = gcm_ghash_4bit_mmx;
744 ctx->gmult = gcm_gmult_4bit_x86;
745 ctx->ghash = gcm_ghash_4bit_x86;
748 ctx->gmult = gcm_gmult_4bit;
749 ctx->ghash = gcm_ghash_4bit;
751 # elif defined(GHASH_ASM_ARM)
752 if (OPENSSL_armcap_P & ARMV7_NEON) {
753 ctx->gmult = gcm_gmult_neon;
754 ctx->ghash = gcm_ghash_neon;
756 gcm_init_4bit(ctx->Htable,ctx->H.u);
757 ctx->gmult = gcm_gmult_4bit;
758 ctx->ghash = gcm_ghash_4bit;
760 # elif defined(GHASH_ASM_SPARC)
761 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
762 ctx->gmult = gcm_gmult_vis3;
763 ctx->ghash = gcm_ghash_vis3;
765 gcm_init_4bit(ctx->Htable,ctx->H.u);
766 ctx->gmult = gcm_gmult_4bit;
767 ctx->ghash = gcm_ghash_4bit;
770 gcm_init_4bit(ctx->Htable,ctx->H.u);
775 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
777 const union { long one; char little; } is_endian = {1};
779 #ifdef GCM_FUNCREF_4BIT
780 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
787 ctx->len.u[0] = 0; /* AAD length */
788 ctx->len.u[1] = 0; /* message length */
793 memcpy(ctx->Yi.c,iv,12);
802 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
808 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
812 if (is_endian.little) {
814 ctx->Yi.u[1] ^= BSWAP8(len0);
816 ctx->Yi.c[8] ^= (u8)(len0>>56);
817 ctx->Yi.c[9] ^= (u8)(len0>>48);
818 ctx->Yi.c[10] ^= (u8)(len0>>40);
819 ctx->Yi.c[11] ^= (u8)(len0>>32);
820 ctx->Yi.c[12] ^= (u8)(len0>>24);
821 ctx->Yi.c[13] ^= (u8)(len0>>16);
822 ctx->Yi.c[14] ^= (u8)(len0>>8);
823 ctx->Yi.c[15] ^= (u8)(len0);
827 ctx->Yi.u[1] ^= len0;
831 if (is_endian.little)
832 ctr = GETU32(ctx->Yi.c+12);
837 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
839 if (is_endian.little)
840 PUTU32(ctx->Yi.c+12,ctr);
845 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
849 u64 alen = ctx->len.u[0];
850 #ifdef GCM_FUNCREF_4BIT
851 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
853 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
854 const u8 *inp,size_t len) = ctx->ghash;
858 if (ctx->len.u[1]) return -2;
861 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
863 ctx->len.u[0] = alen;
868 ctx->Xi.c[n] ^= *(aad++);
872 if (n==0) GCM_MUL(ctx,Xi);
880 if ((i = (len&(size_t)-16))) {
887 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
894 n = (unsigned int)len;
895 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
902 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
903 const unsigned char *in, unsigned char *out,
906 const union { long one; char little; } is_endian = {1};
909 u64 mlen = ctx->len.u[1];
910 block128_f block = ctx->block;
911 void *key = ctx->key;
912 #ifdef GCM_FUNCREF_4BIT
913 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
915 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
916 const u8 *inp,size_t len) = ctx->ghash;
921 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
924 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
926 ctx->len.u[1] = mlen;
929 /* First call to encrypt finalizes GHASH(AAD) */
934 if (is_endian.little)
935 ctr = GETU32(ctx->Yi.c+12);
940 #if !defined(OPENSSL_SMALL_FOOTPRINT)
941 if (16%sizeof(size_t) == 0) do { /* always true actually */
944 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
948 if (n==0) GCM_MUL(ctx,Xi);
954 #if defined(STRICT_ALIGNMENT)
955 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
958 #if defined(GHASH) && defined(GHASH_CHUNK)
959 while (len>=GHASH_CHUNK) {
960 size_t j=GHASH_CHUNK;
963 (*block)(ctx->Yi.c,ctx->EKi.c,key);
965 if (is_endian.little)
966 PUTU32(ctx->Yi.c+12,ctr);
969 for (i=0; i<16; i+=sizeof(size_t))
971 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
976 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
979 if ((i = (len&(size_t)-16))) {
983 (*block)(ctx->Yi.c,ctx->EKi.c,key);
985 if (is_endian.little)
986 PUTU32(ctx->Yi.c+12,ctr);
989 for (i=0; i<16; i+=sizeof(size_t))
991 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1000 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1002 if (is_endian.little)
1003 PUTU32(ctx->Yi.c+12,ctr);
1006 for (i=0; i<16; i+=sizeof(size_t))
1007 *(size_t *)(ctx->Xi.c+i) ^=
1008 *(size_t *)(out+i) =
1009 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1017 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1019 if (is_endian.little)
1020 PUTU32(ctx->Yi.c+12,ctr);
1024 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1033 for (i=0;i<len;++i) {
1035 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1037 if (is_endian.little)
1038 PUTU32(ctx->Yi.c+12,ctr);
1042 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1052 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1053 const unsigned char *in, unsigned char *out,
1056 const union { long one; char little; } is_endian = {1};
1057 unsigned int n, ctr;
1059 u64 mlen = ctx->len.u[1];
1060 block128_f block = ctx->block;
1061 void *key = ctx->key;
1062 #ifdef GCM_FUNCREF_4BIT
1063 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1065 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1066 const u8 *inp,size_t len) = ctx->ghash;
1071 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1073 ctx->len.u[1] = mlen;
1076 /* First call to decrypt finalizes GHASH(AAD) */
1081 if (is_endian.little)
1082 ctr = GETU32(ctx->Yi.c+12);
1087 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1088 if (16%sizeof(size_t) == 0) do { /* always true actually */
1092 *(out++) = c^ctx->EKi.c[n];
1097 if (n==0) GCM_MUL (ctx,Xi);
1103 #if defined(STRICT_ALIGNMENT)
1104 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1107 #if defined(GHASH) && defined(GHASH_CHUNK)
1108 while (len>=GHASH_CHUNK) {
1109 size_t j=GHASH_CHUNK;
1111 GHASH(ctx,in,GHASH_CHUNK);
1113 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1115 if (is_endian.little)
1116 PUTU32(ctx->Yi.c+12,ctr);
1119 for (i=0; i<16; i+=sizeof(size_t))
1120 *(size_t *)(out+i) =
1121 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1128 if ((i = (len&(size_t)-16))) {
1131 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1133 if (is_endian.little)
1134 PUTU32(ctx->Yi.c+12,ctr);
1137 for (i=0; i<16; i+=sizeof(size_t))
1138 *(size_t *)(out+i) =
1139 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1147 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1149 if (is_endian.little)
1150 PUTU32(ctx->Yi.c+12,ctr);
1153 for (i=0; i<16; i+=sizeof(size_t)) {
1154 size_t c = *(size_t *)(in+i);
1155 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1156 *(size_t *)(ctx->Xi.c+i) ^= c;
1165 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1167 if (is_endian.little)
1168 PUTU32(ctx->Yi.c+12,ctr);
1174 out[n] = c^ctx->EKi.c[n];
1183 for (i=0;i<len;++i) {
1186 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1188 if (is_endian.little)
1189 PUTU32(ctx->Yi.c+12,ctr);
1194 out[i] = c^ctx->EKi.c[n];
1205 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1206 const unsigned char *in, unsigned char *out,
1207 size_t len, ctr128_f stream)
1209 const union { long one; char little; } is_endian = {1};
1210 unsigned int n, ctr;
1212 u64 mlen = ctx->len.u[1];
1213 void *key = ctx->key;
1214 #ifdef GCM_FUNCREF_4BIT
1215 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1217 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1218 const u8 *inp,size_t len) = ctx->ghash;
1223 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1225 ctx->len.u[1] = mlen;
1228 /* First call to encrypt finalizes GHASH(AAD) */
1233 if (is_endian.little)
1234 ctr = GETU32(ctx->Yi.c+12);
1241 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1245 if (n==0) GCM_MUL(ctx,Xi);
1251 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1252 while (len>=GHASH_CHUNK) {
1253 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1254 ctr += GHASH_CHUNK/16;
1255 if (is_endian.little)
1256 PUTU32(ctx->Yi.c+12,ctr);
1259 GHASH(ctx,out,GHASH_CHUNK);
1265 if ((i = (len&(size_t)-16))) {
1268 (*stream)(in,out,j,key,ctx->Yi.c);
1269 ctr += (unsigned int)j;
1270 if (is_endian.little)
1271 PUTU32(ctx->Yi.c+12,ctr);
1281 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1288 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1290 if (is_endian.little)
1291 PUTU32(ctx->Yi.c+12,ctr);
1295 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1304 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1305 const unsigned char *in, unsigned char *out,
1306 size_t len,ctr128_f stream)
1308 const union { long one; char little; } is_endian = {1};
1309 unsigned int n, ctr;
1311 u64 mlen = ctx->len.u[1];
1312 void *key = ctx->key;
1313 #ifdef GCM_FUNCREF_4BIT
1314 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1316 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1317 const u8 *inp,size_t len) = ctx->ghash;
1322 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1324 ctx->len.u[1] = mlen;
1327 /* First call to decrypt finalizes GHASH(AAD) */
1332 if (is_endian.little)
1333 ctr = GETU32(ctx->Yi.c+12);
1341 *(out++) = c^ctx->EKi.c[n];
1346 if (n==0) GCM_MUL (ctx,Xi);
1352 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1353 while (len>=GHASH_CHUNK) {
1354 GHASH(ctx,in,GHASH_CHUNK);
1355 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1356 ctr += GHASH_CHUNK/16;
1357 if (is_endian.little)
1358 PUTU32(ctx->Yi.c+12,ctr);
1366 if ((i = (len&(size_t)-16))) {
1374 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1381 (*stream)(in,out,j,key,ctx->Yi.c);
1382 ctr += (unsigned int)j;
1383 if (is_endian.little)
1384 PUTU32(ctx->Yi.c+12,ctr);
1392 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1394 if (is_endian.little)
1395 PUTU32(ctx->Yi.c+12,ctr);
1401 out[n] = c^ctx->EKi.c[n];
1410 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1413 const union { long one; char little; } is_endian = {1};
1414 u64 alen = ctx->len.u[0]<<3;
1415 u64 clen = ctx->len.u[1]<<3;
1416 #ifdef GCM_FUNCREF_4BIT
1417 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1420 if (ctx->mres || ctx->ares)
1423 if (is_endian.little) {
1425 alen = BSWAP8(alen);
1426 clen = BSWAP8(clen);
1430 ctx->len.u[0] = alen;
1431 ctx->len.u[1] = clen;
1433 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1434 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1438 ctx->Xi.u[0] ^= alen;
1439 ctx->Xi.u[1] ^= clen;
1442 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1443 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1445 if (tag && len<=sizeof(ctx->Xi))
1446 return memcmp(ctx->Xi.c,tag,len);
1451 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1453 CRYPTO_gcm128_finish(ctx, NULL, 0);
1454 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1457 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1459 GCM128_CONTEXT *ret;
1461 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1462 CRYPTO_gcm128_init(ret,key,block);
1467 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1470 OPENSSL_cleanse(ctx,sizeof(*ctx));
1475 #if defined(SELFTEST)
1477 #include <openssl/aes.h>
1480 static const u8 K1[16],
1485 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1491 static const u8 P2[16],
1492 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1493 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1497 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1498 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1499 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1500 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1501 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1502 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1503 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1504 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1505 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1506 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1507 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1512 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1513 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1514 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1515 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1516 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1517 0xab,0xad,0xda,0xd2},
1518 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1519 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1520 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1521 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1522 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1528 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1529 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1530 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1531 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1532 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1533 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1539 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1540 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1541 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1542 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1543 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1544 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1545 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1546 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1547 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1550 static const u8 K7[24],
1555 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1561 static const u8 P8[16],
1562 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1563 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1567 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1568 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1569 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1570 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1571 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1572 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1573 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1574 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1575 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1576 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1577 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1578 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1583 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1584 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1585 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1586 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1587 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1588 0xab,0xad,0xda,0xd2},
1589 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1590 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1591 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1592 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1593 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1599 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1600 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1601 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1602 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1603 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1604 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1610 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1611 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1612 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1613 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1614 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1615 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1616 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1617 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1618 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1621 static const u8 K13[32],
1626 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1631 static const u8 P14[16],
1633 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1634 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1638 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1639 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1640 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1641 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1642 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1643 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1644 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1645 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1646 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1647 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1648 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1649 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1654 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1655 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1656 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1657 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1658 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1659 0xab,0xad,0xda,0xd2},
1660 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1661 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1662 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1663 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1664 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1670 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1671 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1672 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1673 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1674 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1675 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1681 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1682 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1683 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1684 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1685 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1686 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1687 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1688 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1689 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1691 #define TEST_CASE(n) do { \
1692 u8 out[sizeof(P##n)]; \
1693 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1694 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1695 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1696 memset(out,0,sizeof(out)); \
1697 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1698 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1699 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1700 (C##n && memcmp(out,C##n,sizeof(out)))) \
1701 ret++, printf ("encrypt test#%d failed.\n",n); \
1702 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1703 memset(out,0,sizeof(out)); \
1704 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1705 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1706 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1707 (P##n && memcmp(out,P##n,sizeof(out)))) \
1708 ret++, printf ("decrypt test#%d failed.\n",n); \
1736 #ifdef OPENSSL_CPUID_OBJ
1738 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1739 union { u64 u; u8 c[1024]; } buf;
1742 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1743 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1744 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1746 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1747 start = OPENSSL_rdtsc();
1748 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1749 gcm_t = OPENSSL_rdtsc() - start;
1751 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1752 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1753 (block128_f)AES_encrypt);
1754 start = OPENSSL_rdtsc();
1755 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1756 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1757 (block128_f)AES_encrypt);
1758 ctr_t = OPENSSL_rdtsc() - start;
1760 printf("%.2f-%.2f=%.2f\n",
1761 gcm_t/(double)sizeof(buf),
1762 ctr_t/(double)sizeof(buf),
1763 (gcm_t-ctr_t)/(double)sizeof(buf));
1766 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1767 const u8 *inp,size_t len) = ctx.ghash;
1769 GHASH((&ctx),buf.c,sizeof(buf));
1770 start = OPENSSL_rdtsc();
1771 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1772 gcm_t = OPENSSL_rdtsc() - start;
1773 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);