1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
222 if ((u8 *)Xi==xi) break;
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 if (is_endian.little) {
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
253 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270 for (Htable[8]=V, i=4; i>0; i>>=1) {
275 for (i=2; i<16; i<<=1) {
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
307 * ARM assembler expects specific dword order in Htable.
311 const union { long one; char little; } is_endian = {1};
313 if (is_endian.little)
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
341 size_t rem, nlo, nhi;
342 const union { long one; char little; } is_endian = {1};
344 nlo = ((const u8 *)Xi)[15];
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
365 nlo = ((const u8 *)Xi)[cnt];
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
381 if (is_endian.little) {
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
419 nlo = ((const u8 *)Xi)[15];
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
441 nlo = ((const u8 *)Xi)[cnt];
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
461 * the rem_8bit even here, but the priority is to minimize
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516 nlo = ((const u8 *)Xi)[cnt];
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
524 rem = (size_t)Z.lo&0xff;
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
534 nlo = ((const u8 *)Xi)[0];
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
542 rem = (size_t)Z.lo&0xf;
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
552 if (is_endian.little) {
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
569 } while (inp+=16, len-=16);
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
577 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK (3*1024)
586 #else /* TABLE_BITS */
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
603 X = (long)(BSWAP8(xi[j]));
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
610 const u8 *p = (const u8 *)(xi+j);
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
626 if (is_endian.little) {
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
644 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
648 #if TABLE_BITS==4 && defined(GHASH_ASM)
649 # if !defined(I386_ONLY) && \
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
653 # define GHASH_ASM_X86_OR_64
654 # define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define gcm_init_avx gcm_init_clmul
663 # define gcm_gmult_avx gcm_gmult_clmul
664 # define gcm_ghash_avx gcm_ghash_clmul
666 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
667 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
668 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
671 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
672 # define GHASH_ASM_X86
673 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
679 # elif defined(__arm__) || defined(__arm)
680 # include "arm_arch.h"
682 # define GHASH_ASM_ARM
683 # define GCM_FUNCREF_4BIT
684 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
685 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
687 # elif defined(__sparc__) || defined(__sparc)
688 # include "sparc_arch.h"
689 # define GHASH_ASM_SPARC
690 # define GCM_FUNCREF_4BIT
691 extern unsigned int OPENSSL_sparcv9cap_P[];
692 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
693 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
694 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
698 #ifdef GCM_FUNCREF_4BIT
700 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
703 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
707 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
709 const union { long one; char little; } is_endian = {1};
711 memset(ctx,0,sizeof(*ctx));
715 (*block)(ctx->H.c,ctx->H.c,key);
717 if (is_endian.little) {
718 /* H is stored in host byte order */
720 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
721 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
725 hi = (u64)GETU32(p) <<32|GETU32(p+4);
726 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
733 gcm_init_8bit(ctx->Htable,ctx->H.u);
735 # if defined(GHASH_ASM_X86_OR_64)
736 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
737 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
738 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
739 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
740 gcm_init_avx(ctx->Htable,ctx->H.u);
741 ctx->gmult = gcm_gmult_avx;
742 ctx->ghash = gcm_ghash_avx;
744 gcm_init_clmul(ctx->Htable,ctx->H.u);
745 ctx->gmult = gcm_gmult_clmul;
746 ctx->ghash = gcm_ghash_clmul;
751 gcm_init_4bit(ctx->Htable,ctx->H.u);
752 # if defined(GHASH_ASM_X86) /* x86 only */
753 # if defined(OPENSSL_IA32_SSE2)
754 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
756 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
758 ctx->gmult = gcm_gmult_4bit_mmx;
759 ctx->ghash = gcm_ghash_4bit_mmx;
761 ctx->gmult = gcm_gmult_4bit_x86;
762 ctx->ghash = gcm_ghash_4bit_x86;
765 ctx->gmult = gcm_gmult_4bit;
766 ctx->ghash = gcm_ghash_4bit;
768 # elif defined(GHASH_ASM_ARM)
769 if (OPENSSL_armcap_P & ARMV7_NEON) {
770 ctx->gmult = gcm_gmult_neon;
771 ctx->ghash = gcm_ghash_neon;
773 gcm_init_4bit(ctx->Htable,ctx->H.u);
774 ctx->gmult = gcm_gmult_4bit;
775 ctx->ghash = gcm_ghash_4bit;
777 # elif defined(GHASH_ASM_SPARC)
778 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
779 gcm_init_vis3(ctx->Htable,ctx->H.u);
780 ctx->gmult = gcm_gmult_vis3;
781 ctx->ghash = gcm_ghash_vis3;
783 gcm_init_4bit(ctx->Htable,ctx->H.u);
784 ctx->gmult = gcm_gmult_4bit;
785 ctx->ghash = gcm_ghash_4bit;
788 gcm_init_4bit(ctx->Htable,ctx->H.u);
793 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
795 const union { long one; char little; } is_endian = {1};
797 #ifdef GCM_FUNCREF_4BIT
798 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
805 ctx->len.u[0] = 0; /* AAD length */
806 ctx->len.u[1] = 0; /* message length */
811 memcpy(ctx->Yi.c,iv,12);
820 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
826 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
830 if (is_endian.little) {
832 ctx->Yi.u[1] ^= BSWAP8(len0);
834 ctx->Yi.c[8] ^= (u8)(len0>>56);
835 ctx->Yi.c[9] ^= (u8)(len0>>48);
836 ctx->Yi.c[10] ^= (u8)(len0>>40);
837 ctx->Yi.c[11] ^= (u8)(len0>>32);
838 ctx->Yi.c[12] ^= (u8)(len0>>24);
839 ctx->Yi.c[13] ^= (u8)(len0>>16);
840 ctx->Yi.c[14] ^= (u8)(len0>>8);
841 ctx->Yi.c[15] ^= (u8)(len0);
845 ctx->Yi.u[1] ^= len0;
849 if (is_endian.little)
851 ctr = BSWAP4(ctx->Yi.d[3]);
853 ctr = GETU32(ctx->Yi.c+12);
859 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
861 if (is_endian.little)
863 ctx->Yi.d[3] = BSWAP4(ctr);
865 PUTU32(ctx->Yi.c+12,ctr);
871 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
875 u64 alen = ctx->len.u[0];
876 #ifdef GCM_FUNCREF_4BIT
877 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
879 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
880 const u8 *inp,size_t len) = ctx->ghash;
884 if (ctx->len.u[1]) return -2;
887 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
889 ctx->len.u[0] = alen;
894 ctx->Xi.c[n] ^= *(aad++);
898 if (n==0) GCM_MUL(ctx,Xi);
906 if ((i = (len&(size_t)-16))) {
913 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
920 n = (unsigned int)len;
921 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
928 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
929 const unsigned char *in, unsigned char *out,
932 const union { long one; char little; } is_endian = {1};
935 u64 mlen = ctx->len.u[1];
936 block128_f block = ctx->block;
937 void *key = ctx->key;
938 #ifdef GCM_FUNCREF_4BIT
939 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
941 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
942 const u8 *inp,size_t len) = ctx->ghash;
947 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
950 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
952 ctx->len.u[1] = mlen;
955 /* First call to encrypt finalizes GHASH(AAD) */
960 if (is_endian.little)
962 ctr = BSWAP4(ctx->Yi.d[3]);
964 ctr = GETU32(ctx->Yi.c+12);
970 #if !defined(OPENSSL_SMALL_FOOTPRINT)
971 if (16%sizeof(size_t) == 0) do { /* always true actually */
974 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
978 if (n==0) GCM_MUL(ctx,Xi);
984 #if defined(STRICT_ALIGNMENT)
985 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
988 #if defined(GHASH) && defined(GHASH_CHUNK)
989 while (len>=GHASH_CHUNK) {
990 size_t j=GHASH_CHUNK;
993 size_t *out_t=(size_t *)out;
994 const size_t *in_t=(const size_t *)in;
996 (*block)(ctx->Yi.c,ctx->EKi.c,key);
998 if (is_endian.little)
1000 ctx->Yi.d[3] = BSWAP4(ctr);
1002 PUTU32(ctx->Yi.c+12,ctr);
1006 for (i=0; i<16/sizeof(size_t); ++i)
1007 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1012 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1015 if ((i = (len&(size_t)-16))) {
1019 size_t *out_t=(size_t *)out;
1020 const size_t *in_t=(const size_t *)in;
1022 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1024 if (is_endian.little)
1026 ctx->Yi.d[3] = BSWAP4(ctr);
1028 PUTU32(ctx->Yi.c+12,ctr);
1032 for (i=0; i<16/sizeof(size_t); ++i)
1033 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1042 size_t *out_t=(size_t *)out;
1043 const size_t *in_t=(const size_t *)in;
1045 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1047 if (is_endian.little)
1049 ctx->Yi.d[3] = BSWAP4(ctr);
1051 PUTU32(ctx->Yi.c+12,ctr);
1055 for (i=0; i<16/sizeof(size_t); ++i)
1057 out_t[i] = in_t[i]^ctx->EKi.t[i];
1065 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1067 if (is_endian.little)
1069 ctx->Yi.d[3] = BSWAP4(ctr);
1071 PUTU32(ctx->Yi.c+12,ctr);
1076 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1085 for (i=0;i<len;++i) {
1087 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1089 if (is_endian.little)
1091 ctx->Yi.d[3] = BSWAP4(ctr);
1093 PUTU32(ctx->Yi.c+12,ctr);
1098 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1108 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1109 const unsigned char *in, unsigned char *out,
1112 const union { long one; char little; } is_endian = {1};
1113 unsigned int n, ctr;
1115 u64 mlen = ctx->len.u[1];
1116 block128_f block = ctx->block;
1117 void *key = ctx->key;
1118 #ifdef GCM_FUNCREF_4BIT
1119 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1121 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1122 const u8 *inp,size_t len) = ctx->ghash;
1127 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1129 ctx->len.u[1] = mlen;
1132 /* First call to decrypt finalizes GHASH(AAD) */
1137 if (is_endian.little)
1139 ctr = BSWAP4(ctx->Yi.d[3]);
1141 ctr = GETU32(ctx->Yi.c+12);
1147 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1148 if (16%sizeof(size_t) == 0) do { /* always true actually */
1152 *(out++) = c^ctx->EKi.c[n];
1157 if (n==0) GCM_MUL (ctx,Xi);
1163 #if defined(STRICT_ALIGNMENT)
1164 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1167 #if defined(GHASH) && defined(GHASH_CHUNK)
1168 while (len>=GHASH_CHUNK) {
1169 size_t j=GHASH_CHUNK;
1171 GHASH(ctx,in,GHASH_CHUNK);
1173 size_t *out_t=(size_t *)out;
1174 const size_t *in_t=(const size_t *)in;
1176 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1178 if (is_endian.little)
1180 ctx->Yi.d[3] = BSWAP4(ctr);
1182 PUTU32(ctx->Yi.c+12,ctr);
1186 for (i=0; i<16/sizeof(size_t); ++i)
1187 out_t[i] = in_t[i]^ctx->EKi.t[i];
1194 if ((i = (len&(size_t)-16))) {
1197 size_t *out_t=(size_t *)out;
1198 const size_t *in_t=(const size_t *)in;
1200 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1202 if (is_endian.little)
1204 ctx->Yi.d[3] = BSWAP4(ctr);
1206 PUTU32(ctx->Yi.c+12,ctr);
1210 for (i=0; i<16/sizeof(size_t); ++i)
1211 out_t[i] = in_t[i]^ctx->EKi.t[i];
1219 size_t *out_t=(size_t *)out;
1220 const size_t *in_t=(const size_t *)in;
1222 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1224 if (is_endian.little)
1226 ctx->Yi.d[3] = BSWAP4(ctr);
1228 PUTU32(ctx->Yi.c+12,ctr);
1232 for (i=0; i<16/sizeof(size_t); ++i) {
1234 out[i] = c^ctx->EKi.t[i];
1244 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1246 if (is_endian.little)
1248 ctx->Yi.d[3] = BSWAP4(ctr);
1250 PUTU32(ctx->Yi.c+12,ctr);
1257 out[n] = c^ctx->EKi.c[n];
1266 for (i=0;i<len;++i) {
1269 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1271 if (is_endian.little)
1273 ctx->Yi.d[3] = BSWAP4(ctr);
1275 PUTU32(ctx->Yi.c+12,ctr);
1281 out[i] = c^ctx->EKi.c[n];
1292 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1293 const unsigned char *in, unsigned char *out,
1294 size_t len, ctr128_f stream)
1296 const union { long one; char little; } is_endian = {1};
1297 unsigned int n, ctr;
1299 u64 mlen = ctx->len.u[1];
1300 void *key = ctx->key;
1301 #ifdef GCM_FUNCREF_4BIT
1302 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1304 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1305 const u8 *inp,size_t len) = ctx->ghash;
1310 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1312 ctx->len.u[1] = mlen;
1315 /* First call to encrypt finalizes GHASH(AAD) */
1320 if (is_endian.little)
1322 ctr = BSWAP4(ctx->Yi.d[3]);
1324 ctr = GETU32(ctx->Yi.c+12);
1332 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1336 if (n==0) GCM_MUL(ctx,Xi);
1342 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1343 while (len>=GHASH_CHUNK) {
1344 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1345 ctr += GHASH_CHUNK/16;
1346 if (is_endian.little)
1348 ctx->Yi.d[3] = BSWAP4(ctr);
1350 PUTU32(ctx->Yi.c+12,ctr);
1354 GHASH(ctx,out,GHASH_CHUNK);
1360 if ((i = (len&(size_t)-16))) {
1363 (*stream)(in,out,j,key,ctx->Yi.c);
1364 ctr += (unsigned int)j;
1365 if (is_endian.little)
1367 ctx->Yi.d[3] = BSWAP4(ctr);
1369 PUTU32(ctx->Yi.c+12,ctr);
1380 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1387 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1389 if (is_endian.little)
1391 ctx->Yi.d[3] = BSWAP4(ctr);
1393 PUTU32(ctx->Yi.c+12,ctr);
1398 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1407 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1408 const unsigned char *in, unsigned char *out,
1409 size_t len,ctr128_f stream)
1411 const union { long one; char little; } is_endian = {1};
1412 unsigned int n, ctr;
1414 u64 mlen = ctx->len.u[1];
1415 void *key = ctx->key;
1416 #ifdef GCM_FUNCREF_4BIT
1417 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1419 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1420 const u8 *inp,size_t len) = ctx->ghash;
1425 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1427 ctx->len.u[1] = mlen;
1430 /* First call to decrypt finalizes GHASH(AAD) */
1435 if (is_endian.little)
1437 ctr = BSWAP4(ctx->Yi.d[3]);
1439 ctr = GETU32(ctx->Yi.c+12);
1448 *(out++) = c^ctx->EKi.c[n];
1453 if (n==0) GCM_MUL (ctx,Xi);
1459 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1460 while (len>=GHASH_CHUNK) {
1461 GHASH(ctx,in,GHASH_CHUNK);
1462 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1463 ctr += GHASH_CHUNK/16;
1464 if (is_endian.little)
1466 ctx->Yi.d[3] = BSWAP4(ctr);
1468 PUTU32(ctx->Yi.c+12,ctr);
1477 if ((i = (len&(size_t)-16))) {
1485 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1492 (*stream)(in,out,j,key,ctx->Yi.c);
1493 ctr += (unsigned int)j;
1494 if (is_endian.little)
1496 ctx->Yi.d[3] = BSWAP4(ctr);
1498 PUTU32(ctx->Yi.c+12,ctr);
1507 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1509 if (is_endian.little)
1511 ctx->Yi.d[3] = BSWAP4(ctr);
1513 PUTU32(ctx->Yi.c+12,ctr);
1520 out[n] = c^ctx->EKi.c[n];
1529 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1532 const union { long one; char little; } is_endian = {1};
1533 u64 alen = ctx->len.u[0]<<3;
1534 u64 clen = ctx->len.u[1]<<3;
1535 #ifdef GCM_FUNCREF_4BIT
1536 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1539 if (ctx->mres || ctx->ares)
1542 if (is_endian.little) {
1544 alen = BSWAP8(alen);
1545 clen = BSWAP8(clen);
1549 ctx->len.u[0] = alen;
1550 ctx->len.u[1] = clen;
1552 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1553 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1557 ctx->Xi.u[0] ^= alen;
1558 ctx->Xi.u[1] ^= clen;
1561 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1562 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1564 if (tag && len<=sizeof(ctx->Xi))
1565 return memcmp(ctx->Xi.c,tag,len);
1570 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1572 CRYPTO_gcm128_finish(ctx, NULL, 0);
1573 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1576 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1578 GCM128_CONTEXT *ret;
1580 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1581 CRYPTO_gcm128_init(ret,key,block);
1586 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1589 OPENSSL_cleanse(ctx,sizeof(*ctx));
1594 #if defined(SELFTEST)
1596 #include <openssl/aes.h>
1599 static const u8 K1[16],
1604 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1610 static const u8 P2[16],
1611 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1612 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1616 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1617 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1618 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1619 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1620 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1621 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1622 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1623 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1624 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1625 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1626 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1631 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1632 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1633 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1634 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1635 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1636 0xab,0xad,0xda,0xd2},
1637 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1638 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1639 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1640 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1641 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1647 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1648 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1649 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1650 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1651 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1652 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1658 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1659 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1660 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1661 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1662 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1663 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1664 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1665 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1666 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1669 static const u8 K7[24],
1674 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1680 static const u8 P8[16],
1681 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1682 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1686 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1687 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1688 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1689 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1690 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1691 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1692 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1693 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1694 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1695 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1696 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1697 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1702 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1703 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1704 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1705 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1706 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1707 0xab,0xad,0xda,0xd2},
1708 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1709 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1710 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1711 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1712 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1718 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1719 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1720 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1721 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1722 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1723 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1729 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1730 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1731 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1732 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1733 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1734 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1735 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1736 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1737 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1740 static const u8 K13[32],
1745 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1750 static const u8 P14[16],
1752 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1753 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1757 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1758 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1759 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1760 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1761 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1762 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1763 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1764 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1765 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1766 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1767 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1768 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1773 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1774 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1775 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1776 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1777 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1778 0xab,0xad,0xda,0xd2},
1779 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1780 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1781 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1782 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1783 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1789 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1790 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1791 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1792 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1793 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1794 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1800 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1801 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1802 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1803 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1804 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1805 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1806 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1807 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1808 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1815 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1816 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1817 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1818 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1819 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1820 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1821 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1822 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1823 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1828 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1830 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1831 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1832 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1833 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1834 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1835 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1836 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1837 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1838 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1839 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1840 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1841 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1842 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1843 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1844 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1845 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1846 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1847 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1848 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1850 #define TEST_CASE(n) do { \
1851 u8 out[sizeof(P##n)]; \
1852 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1853 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1854 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1855 memset(out,0,sizeof(out)); \
1856 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1857 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1858 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1859 (C##n && memcmp(out,C##n,sizeof(out)))) \
1860 ret++, printf ("encrypt test#%d failed.\n",n); \
1861 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1862 memset(out,0,sizeof(out)); \
1863 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1864 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1865 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1866 (P##n && memcmp(out,P##n,sizeof(out)))) \
1867 ret++, printf ("decrypt test#%d failed.\n",n); \
1897 #ifdef OPENSSL_CPUID_OBJ
1899 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1900 union { u64 u; u8 c[1024]; } buf;
1903 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1904 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1905 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1907 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1908 start = OPENSSL_rdtsc();
1909 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1910 gcm_t = OPENSSL_rdtsc() - start;
1912 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1913 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1914 (block128_f)AES_encrypt);
1915 start = OPENSSL_rdtsc();
1916 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1917 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1918 (block128_f)AES_encrypt);
1919 ctr_t = OPENSSL_rdtsc() - start;
1921 printf("%.2f-%.2f=%.2f\n",
1922 gcm_t/(double)sizeof(buf),
1923 ctr_t/(double)sizeof(buf),
1924 (gcm_t-ctr_t)/(double)sizeof(buf));
1927 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1928 const u8 *inp,size_t len) = ctx.ghash;
1930 GHASH((&ctx),buf.c,sizeof(buf));
1931 start = OPENSSL_rdtsc();
1932 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1933 gcm_t = OPENSSL_rdtsc() - start;
1934 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);