1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
222 if ((u8 *)Xi==xi) break;
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 if (is_endian.little) {
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
253 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270 for (Htable[8]=V, i=4; i>0; i>>=1) {
275 for (i=2; i<16; i<<=1) {
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
307 * ARM assembler expects specific dword order in Htable.
311 const union { long one; char little; } is_endian = {1};
313 if (is_endian.little)
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
341 size_t rem, nlo, nhi;
342 const union { long one; char little; } is_endian = {1};
344 nlo = ((const u8 *)Xi)[15];
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
365 nlo = ((const u8 *)Xi)[cnt];
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
381 if (is_endian.little) {
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
419 nlo = ((const u8 *)Xi)[15];
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
441 nlo = ((const u8 *)Xi)[cnt];
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
461 * the rem_8bit even here, but the priority is to minimize
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516 nlo = ((const u8 *)Xi)[cnt];
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
524 rem = (size_t)Z.lo&0xff;
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
534 nlo = ((const u8 *)Xi)[0];
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
542 rem = (size_t)Z.lo&0xf;
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
552 if (is_endian.little) {
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
569 } while (inp+=16, len-=16);
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
577 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK (3*1024)
586 #else /* TABLE_BITS */
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
603 X = (long)(BSWAP8(xi[j]));
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
610 const u8 *p = (const u8 *)(xi+j);
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
626 if (is_endian.little) {
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
644 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
648 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
649 # if !defined(I386_ONLY) && \
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
653 # define GHASH_ASM_X86_OR_64
654 # define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define gcm_init_avx gcm_init_clmul
663 # define gcm_gmult_avx gcm_gmult_clmul
664 # define gcm_ghash_avx gcm_ghash_clmul
666 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
667 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
668 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
671 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
672 # define GHASH_ASM_X86
673 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
679 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
680 # include "arm_arch.h"
682 # define GHASH_ASM_ARM
683 # define GCM_FUNCREF_4BIT
684 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
685 # if defined(__arm__) || defined(__arm)
686 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
688 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
691 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
692 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
693 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
695 # elif defined(__sparc__) || defined(__sparc)
696 # include "sparc_arch.h"
697 # define GHASH_ASM_SPARC
698 # define GCM_FUNCREF_4BIT
699 extern unsigned int OPENSSL_sparcv9cap_P[];
700 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
701 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
702 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
706 #ifdef GCM_FUNCREF_4BIT
708 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
711 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
715 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
717 const union { long one; char little; } is_endian = {1};
719 memset(ctx,0,sizeof(*ctx));
723 (*block)(ctx->H.c,ctx->H.c,key);
725 if (is_endian.little) {
726 /* H is stored in host byte order */
728 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
729 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
733 hi = (u64)GETU32(p) <<32|GETU32(p+4);
734 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
741 gcm_init_8bit(ctx->Htable,ctx->H.u);
743 # if defined(GHASH_ASM_X86_OR_64)
744 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
745 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
746 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
747 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
748 gcm_init_avx(ctx->Htable,ctx->H.u);
749 ctx->gmult = gcm_gmult_avx;
750 ctx->ghash = gcm_ghash_avx;
752 gcm_init_clmul(ctx->Htable,ctx->H.u);
753 ctx->gmult = gcm_gmult_clmul;
754 ctx->ghash = gcm_ghash_clmul;
759 gcm_init_4bit(ctx->Htable,ctx->H.u);
760 # if defined(GHASH_ASM_X86) /* x86 only */
761 # if defined(OPENSSL_IA32_SSE2)
762 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
764 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
766 ctx->gmult = gcm_gmult_4bit_mmx;
767 ctx->ghash = gcm_ghash_4bit_mmx;
769 ctx->gmult = gcm_gmult_4bit_x86;
770 ctx->ghash = gcm_ghash_4bit_x86;
773 ctx->gmult = gcm_gmult_4bit;
774 ctx->ghash = gcm_ghash_4bit;
776 # elif defined(GHASH_ASM_ARM)
777 # ifdef PMULL_CAPABLE
779 gcm_init_v8(ctx->Htable,ctx->H.u);
780 ctx->gmult = gcm_gmult_v8;
781 ctx->ghash = gcm_ghash_v8;
786 gcm_init_neon(ctx->Htable,ctx->H.u);
787 ctx->gmult = gcm_gmult_neon;
788 ctx->ghash = gcm_ghash_neon;
792 gcm_init_4bit(ctx->Htable,ctx->H.u);
793 ctx->gmult = gcm_gmult_4bit;
794 ctx->ghash = gcm_ghash_4bit;
796 # elif defined(GHASH_ASM_SPARC)
797 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
798 gcm_init_vis3(ctx->Htable,ctx->H.u);
799 ctx->gmult = gcm_gmult_vis3;
800 ctx->ghash = gcm_ghash_vis3;
802 gcm_init_4bit(ctx->Htable,ctx->H.u);
803 ctx->gmult = gcm_gmult_4bit;
804 ctx->ghash = gcm_ghash_4bit;
807 gcm_init_4bit(ctx->Htable,ctx->H.u);
812 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
814 const union { long one; char little; } is_endian = {1};
816 #ifdef GCM_FUNCREF_4BIT
817 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
824 ctx->len.u[0] = 0; /* AAD length */
825 ctx->len.u[1] = 0; /* message length */
830 memcpy(ctx->Yi.c,iv,12);
839 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
845 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
849 if (is_endian.little) {
851 ctx->Yi.u[1] ^= BSWAP8(len0);
853 ctx->Yi.c[8] ^= (u8)(len0>>56);
854 ctx->Yi.c[9] ^= (u8)(len0>>48);
855 ctx->Yi.c[10] ^= (u8)(len0>>40);
856 ctx->Yi.c[11] ^= (u8)(len0>>32);
857 ctx->Yi.c[12] ^= (u8)(len0>>24);
858 ctx->Yi.c[13] ^= (u8)(len0>>16);
859 ctx->Yi.c[14] ^= (u8)(len0>>8);
860 ctx->Yi.c[15] ^= (u8)(len0);
864 ctx->Yi.u[1] ^= len0;
868 if (is_endian.little)
870 ctr = BSWAP4(ctx->Yi.d[3]);
872 ctr = GETU32(ctx->Yi.c+12);
878 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
880 if (is_endian.little)
882 ctx->Yi.d[3] = BSWAP4(ctr);
884 PUTU32(ctx->Yi.c+12,ctr);
890 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
894 u64 alen = ctx->len.u[0];
895 #ifdef GCM_FUNCREF_4BIT
896 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
898 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
899 const u8 *inp,size_t len) = ctx->ghash;
903 if (ctx->len.u[1]) return -2;
906 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
908 ctx->len.u[0] = alen;
913 ctx->Xi.c[n] ^= *(aad++);
917 if (n==0) GCM_MUL(ctx,Xi);
925 if ((i = (len&(size_t)-16))) {
932 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
939 n = (unsigned int)len;
940 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
947 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
948 const unsigned char *in, unsigned char *out,
951 const union { long one; char little; } is_endian = {1};
954 u64 mlen = ctx->len.u[1];
955 block128_f block = ctx->block;
956 void *key = ctx->key;
957 #ifdef GCM_FUNCREF_4BIT
958 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
960 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
961 const u8 *inp,size_t len) = ctx->ghash;
966 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
969 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
971 ctx->len.u[1] = mlen;
974 /* First call to encrypt finalizes GHASH(AAD) */
979 if (is_endian.little)
981 ctr = BSWAP4(ctx->Yi.d[3]);
983 ctr = GETU32(ctx->Yi.c+12);
989 #if !defined(OPENSSL_SMALL_FOOTPRINT)
990 if (16%sizeof(size_t) == 0) do { /* always true actually */
993 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
997 if (n==0) GCM_MUL(ctx,Xi);
1003 #if defined(STRICT_ALIGNMENT)
1004 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1007 #if defined(GHASH) && defined(GHASH_CHUNK)
1008 while (len>=GHASH_CHUNK) {
1009 size_t j=GHASH_CHUNK;
1012 size_t *out_t=(size_t *)out;
1013 const size_t *in_t=(const size_t *)in;
1015 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1017 if (is_endian.little)
1019 ctx->Yi.d[3] = BSWAP4(ctr);
1021 PUTU32(ctx->Yi.c+12,ctr);
1025 for (i=0; i<16/sizeof(size_t); ++i)
1026 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1031 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1034 if ((i = (len&(size_t)-16))) {
1038 size_t *out_t=(size_t *)out;
1039 const size_t *in_t=(const size_t *)in;
1041 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1043 if (is_endian.little)
1045 ctx->Yi.d[3] = BSWAP4(ctr);
1047 PUTU32(ctx->Yi.c+12,ctr);
1051 for (i=0; i<16/sizeof(size_t); ++i)
1052 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1061 size_t *out_t=(size_t *)out;
1062 const size_t *in_t=(const size_t *)in;
1064 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1066 if (is_endian.little)
1068 ctx->Yi.d[3] = BSWAP4(ctr);
1070 PUTU32(ctx->Yi.c+12,ctr);
1074 for (i=0; i<16/sizeof(size_t); ++i)
1076 out_t[i] = in_t[i]^ctx->EKi.t[i];
1084 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1086 if (is_endian.little)
1088 ctx->Yi.d[3] = BSWAP4(ctr);
1090 PUTU32(ctx->Yi.c+12,ctr);
1095 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1104 for (i=0;i<len;++i) {
1106 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1108 if (is_endian.little)
1110 ctx->Yi.d[3] = BSWAP4(ctr);
1112 PUTU32(ctx->Yi.c+12,ctr);
1117 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1127 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1128 const unsigned char *in, unsigned char *out,
1131 const union { long one; char little; } is_endian = {1};
1132 unsigned int n, ctr;
1134 u64 mlen = ctx->len.u[1];
1135 block128_f block = ctx->block;
1136 void *key = ctx->key;
1137 #ifdef GCM_FUNCREF_4BIT
1138 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1140 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1141 const u8 *inp,size_t len) = ctx->ghash;
1146 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1148 ctx->len.u[1] = mlen;
1151 /* First call to decrypt finalizes GHASH(AAD) */
1156 if (is_endian.little)
1158 ctr = BSWAP4(ctx->Yi.d[3]);
1160 ctr = GETU32(ctx->Yi.c+12);
1166 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1167 if (16%sizeof(size_t) == 0) do { /* always true actually */
1171 *(out++) = c^ctx->EKi.c[n];
1176 if (n==0) GCM_MUL (ctx,Xi);
1182 #if defined(STRICT_ALIGNMENT)
1183 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1186 #if defined(GHASH) && defined(GHASH_CHUNK)
1187 while (len>=GHASH_CHUNK) {
1188 size_t j=GHASH_CHUNK;
1190 GHASH(ctx,in,GHASH_CHUNK);
1192 size_t *out_t=(size_t *)out;
1193 const size_t *in_t=(const size_t *)in;
1195 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1197 if (is_endian.little)
1199 ctx->Yi.d[3] = BSWAP4(ctr);
1201 PUTU32(ctx->Yi.c+12,ctr);
1205 for (i=0; i<16/sizeof(size_t); ++i)
1206 out_t[i] = in_t[i]^ctx->EKi.t[i];
1213 if ((i = (len&(size_t)-16))) {
1216 size_t *out_t=(size_t *)out;
1217 const size_t *in_t=(const size_t *)in;
1219 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1221 if (is_endian.little)
1223 ctx->Yi.d[3] = BSWAP4(ctr);
1225 PUTU32(ctx->Yi.c+12,ctr);
1229 for (i=0; i<16/sizeof(size_t); ++i)
1230 out_t[i] = in_t[i]^ctx->EKi.t[i];
1238 size_t *out_t=(size_t *)out;
1239 const size_t *in_t=(const size_t *)in;
1241 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1243 if (is_endian.little)
1245 ctx->Yi.d[3] = BSWAP4(ctr);
1247 PUTU32(ctx->Yi.c+12,ctr);
1251 for (i=0; i<16/sizeof(size_t); ++i) {
1253 out[i] = c^ctx->EKi.t[i];
1263 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1265 if (is_endian.little)
1267 ctx->Yi.d[3] = BSWAP4(ctr);
1269 PUTU32(ctx->Yi.c+12,ctr);
1276 out[n] = c^ctx->EKi.c[n];
1285 for (i=0;i<len;++i) {
1288 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1290 if (is_endian.little)
1292 ctx->Yi.d[3] = BSWAP4(ctr);
1294 PUTU32(ctx->Yi.c+12,ctr);
1300 out[i] = c^ctx->EKi.c[n];
1311 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1312 const unsigned char *in, unsigned char *out,
1313 size_t len, ctr128_f stream)
1315 const union { long one; char little; } is_endian = {1};
1316 unsigned int n, ctr;
1318 u64 mlen = ctx->len.u[1];
1319 void *key = ctx->key;
1320 #ifdef GCM_FUNCREF_4BIT
1321 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1323 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1324 const u8 *inp,size_t len) = ctx->ghash;
1329 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1331 ctx->len.u[1] = mlen;
1334 /* First call to encrypt finalizes GHASH(AAD) */
1339 if (is_endian.little)
1341 ctr = BSWAP4(ctx->Yi.d[3]);
1343 ctr = GETU32(ctx->Yi.c+12);
1351 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1355 if (n==0) GCM_MUL(ctx,Xi);
1361 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1362 while (len>=GHASH_CHUNK) {
1363 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1364 ctr += GHASH_CHUNK/16;
1365 if (is_endian.little)
1367 ctx->Yi.d[3] = BSWAP4(ctr);
1369 PUTU32(ctx->Yi.c+12,ctr);
1373 GHASH(ctx,out,GHASH_CHUNK);
1379 if ((i = (len&(size_t)-16))) {
1382 (*stream)(in,out,j,key,ctx->Yi.c);
1383 ctr += (unsigned int)j;
1384 if (is_endian.little)
1386 ctx->Yi.d[3] = BSWAP4(ctr);
1388 PUTU32(ctx->Yi.c+12,ctr);
1399 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1406 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1408 if (is_endian.little)
1410 ctx->Yi.d[3] = BSWAP4(ctr);
1412 PUTU32(ctx->Yi.c+12,ctr);
1417 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1426 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1427 const unsigned char *in, unsigned char *out,
1428 size_t len,ctr128_f stream)
1430 const union { long one; char little; } is_endian = {1};
1431 unsigned int n, ctr;
1433 u64 mlen = ctx->len.u[1];
1434 void *key = ctx->key;
1435 #ifdef GCM_FUNCREF_4BIT
1436 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1438 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1439 const u8 *inp,size_t len) = ctx->ghash;
1444 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1446 ctx->len.u[1] = mlen;
1449 /* First call to decrypt finalizes GHASH(AAD) */
1454 if (is_endian.little)
1456 ctr = BSWAP4(ctx->Yi.d[3]);
1458 ctr = GETU32(ctx->Yi.c+12);
1467 *(out++) = c^ctx->EKi.c[n];
1472 if (n==0) GCM_MUL (ctx,Xi);
1478 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1479 while (len>=GHASH_CHUNK) {
1480 GHASH(ctx,in,GHASH_CHUNK);
1481 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1482 ctr += GHASH_CHUNK/16;
1483 if (is_endian.little)
1485 ctx->Yi.d[3] = BSWAP4(ctr);
1487 PUTU32(ctx->Yi.c+12,ctr);
1496 if ((i = (len&(size_t)-16))) {
1504 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1511 (*stream)(in,out,j,key,ctx->Yi.c);
1512 ctr += (unsigned int)j;
1513 if (is_endian.little)
1515 ctx->Yi.d[3] = BSWAP4(ctr);
1517 PUTU32(ctx->Yi.c+12,ctr);
1526 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1528 if (is_endian.little)
1530 ctx->Yi.d[3] = BSWAP4(ctr);
1532 PUTU32(ctx->Yi.c+12,ctr);
1539 out[n] = c^ctx->EKi.c[n];
1548 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1551 const union { long one; char little; } is_endian = {1};
1552 u64 alen = ctx->len.u[0]<<3;
1553 u64 clen = ctx->len.u[1]<<3;
1554 #ifdef GCM_FUNCREF_4BIT
1555 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1558 if (ctx->mres || ctx->ares)
1561 if (is_endian.little) {
1563 alen = BSWAP8(alen);
1564 clen = BSWAP8(clen);
1568 ctx->len.u[0] = alen;
1569 ctx->len.u[1] = clen;
1571 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1572 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1576 ctx->Xi.u[0] ^= alen;
1577 ctx->Xi.u[1] ^= clen;
1580 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1581 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1583 if (tag && len<=sizeof(ctx->Xi))
1584 return memcmp(ctx->Xi.c,tag,len);
1589 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1591 CRYPTO_gcm128_finish(ctx, NULL, 0);
1592 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1595 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1597 GCM128_CONTEXT *ret;
1599 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1600 CRYPTO_gcm128_init(ret,key,block);
1605 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1608 OPENSSL_cleanse(ctx,sizeof(*ctx));
1613 #if defined(SELFTEST)
1615 #include <openssl/aes.h>
1618 static const u8 K1[16],
1623 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1629 static const u8 P2[16],
1630 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1631 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1635 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1636 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1637 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1638 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1639 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1640 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1641 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1642 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1643 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1644 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1645 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1650 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1651 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1652 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1653 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1654 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1655 0xab,0xad,0xda,0xd2},
1656 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1657 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1658 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1659 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1660 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1666 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1667 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1668 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1669 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1670 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1671 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1677 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1678 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1679 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1680 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1681 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1682 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1683 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1684 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1685 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1688 static const u8 K7[24],
1693 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1699 static const u8 P8[16],
1700 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1701 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1705 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1706 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1707 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1708 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1709 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1710 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1711 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1712 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1713 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1714 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1715 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1716 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1721 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1722 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1723 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1724 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1725 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1726 0xab,0xad,0xda,0xd2},
1727 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1728 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1729 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1730 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1731 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1737 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1738 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1739 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1740 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1741 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1742 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1748 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1749 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1750 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1751 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1752 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1753 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1754 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1755 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1756 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1759 static const u8 K13[32],
1764 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1769 static const u8 P14[16],
1771 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1772 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1776 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1777 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1778 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1779 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1780 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1781 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1782 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1783 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1784 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1785 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1786 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1787 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1792 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1793 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1794 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1795 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1796 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1797 0xab,0xad,0xda,0xd2},
1798 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1799 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1800 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1801 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1802 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1808 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1809 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1810 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1811 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1812 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1813 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1819 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1820 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1821 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1822 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1823 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1824 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1825 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1826 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1827 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1834 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1835 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1836 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1837 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1838 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1839 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1840 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1841 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1842 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1847 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1849 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1850 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1851 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1852 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1853 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1854 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1855 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1856 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1857 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1858 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1859 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1860 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1861 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1862 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1863 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1864 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1865 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1866 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1867 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1869 #define TEST_CASE(n) do { \
1870 u8 out[sizeof(P##n)]; \
1871 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1872 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1873 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1874 memset(out,0,sizeof(out)); \
1875 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1876 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1877 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1878 (C##n && memcmp(out,C##n,sizeof(out)))) \
1879 ret++, printf ("encrypt test#%d failed.\n",n); \
1880 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1881 memset(out,0,sizeof(out)); \
1882 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1883 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1884 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1885 (P##n && memcmp(out,P##n,sizeof(out)))) \
1886 ret++, printf ("decrypt test#%d failed.\n",n); \
1916 #ifdef OPENSSL_CPUID_OBJ
1918 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1919 union { u64 u; u8 c[1024]; } buf;
1922 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1923 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1924 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1926 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1927 start = OPENSSL_rdtsc();
1928 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1929 gcm_t = OPENSSL_rdtsc() - start;
1931 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1932 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1933 (block128_f)AES_encrypt);
1934 start = OPENSSL_rdtsc();
1935 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1936 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1937 (block128_f)AES_encrypt);
1938 ctr_t = OPENSSL_rdtsc() - start;
1940 printf("%.2f-%.2f=%.2f\n",
1941 gcm_t/(double)sizeof(buf),
1942 ctr_t/(double)sizeof(buf),
1943 (gcm_t-ctr_t)/(double)sizeof(buf));
1946 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1947 const u8 *inp,size_t len) = ctx.ghash;
1949 GHASH((&ctx),buf.c,sizeof(buf));
1950 start = OPENSSL_rdtsc();
1951 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1952 gcm_t = OPENSSL_rdtsc() - start;
1953 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);