1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
221 if ((u8 *)Xi==xi) break;
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
234 if (is_endian.little) {
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8]=V, i=4; i>0; i>>=1) {
274 for (i=2; i<16; i<<=1) {
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 * ARM assembler expects specific dword order in Htable.
310 const union { long one; char little; } is_endian = {1};
312 if (is_endian.little)
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
329 static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
339 size_t rem, nlo, nhi;
340 const union { long one; char little; } is_endian = {1};
342 nlo = ((const u8 *)Xi)[15];
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
363 nlo = ((const u8 *)Xi)[cnt];
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
379 if (is_endian.little) {
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
417 nlo = ((const u8 *)Xi)[15];
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
439 nlo = ((const u8 *)Xi)[cnt];
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
459 * the rem_8bit even here, but the priority is to minimize
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 nlo = ((const u8 *)Xi)[cnt];
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
521 rem = (size_t)Z.lo&0xff;
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
531 nlo = ((const u8 *)Xi)[0];
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
539 rem = (size_t)Z.lo&0xf;
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
549 if (is_endian.little) {
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
566 } while (inp+=16, len-=16);
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK (3*1024)
583 #else /* TABLE_BITS */
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
600 X = (long)(BSWAP8(xi[j]));
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607 const u8 *p = (const u8 *)(xi+j);
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
623 if (is_endian.little) {
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
646 # if !defined(I386_ONLY) && \
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650 # define GHASH_ASM_X86_OR_64
651 # define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define gcm_init_avx gcm_init_clmul
660 # define gcm_gmult_avx gcm_gmult_clmul
661 # define gcm_ghash_avx gcm_ghash_clmul
663 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
668 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
669 # define GHASH_ASM_X86
670 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
677 # include "arm_arch.h"
678 # if __ARM_MAX_ARCH__>=7
679 # define GHASH_ASM_ARM
680 # define GCM_FUNCREF_4BIT
681 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
682 # if defined(__arm__) || defined(__arm)
683 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
685 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
686 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
687 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
688 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
692 # elif defined(__sparc__) || defined(__sparc)
693 # include "sparc_arch.h"
694 # define GHASH_ASM_SPARC
695 # define GCM_FUNCREF_4BIT
696 extern unsigned int OPENSSL_sparcv9cap_P[];
697 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
698 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
699 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
700 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
701 # include "ppc_arch.h"
702 # define GHASH_ASM_PPC
703 # define GCM_FUNCREF_4BIT
704 void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
705 void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
706 void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
710 #ifdef GCM_FUNCREF_4BIT
712 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
715 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
719 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
721 const union { long one; char little; } is_endian = {1};
723 memset(ctx,0,sizeof(*ctx));
727 (*block)(ctx->H.c,ctx->H.c,key);
729 if (is_endian.little) {
730 /* H is stored in host byte order */
732 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
733 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
737 hi = (u64)GETU32(p) <<32|GETU32(p+4);
738 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
745 gcm_init_8bit(ctx->Htable,ctx->H.u);
747 # if defined(GHASH_ASM_X86_OR_64)
748 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
749 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
750 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
751 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
752 gcm_init_avx(ctx->Htable,ctx->H.u);
753 ctx->gmult = gcm_gmult_avx;
754 ctx->ghash = gcm_ghash_avx;
756 gcm_init_clmul(ctx->Htable,ctx->H.u);
757 ctx->gmult = gcm_gmult_clmul;
758 ctx->ghash = gcm_ghash_clmul;
763 gcm_init_4bit(ctx->Htable,ctx->H.u);
764 # if defined(GHASH_ASM_X86) /* x86 only */
765 # if defined(OPENSSL_IA32_SSE2)
766 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
768 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
770 ctx->gmult = gcm_gmult_4bit_mmx;
771 ctx->ghash = gcm_ghash_4bit_mmx;
773 ctx->gmult = gcm_gmult_4bit_x86;
774 ctx->ghash = gcm_ghash_4bit_x86;
777 ctx->gmult = gcm_gmult_4bit;
778 ctx->ghash = gcm_ghash_4bit;
780 # elif defined(GHASH_ASM_ARM)
781 # ifdef PMULL_CAPABLE
783 gcm_init_v8(ctx->Htable,ctx->H.u);
784 ctx->gmult = gcm_gmult_v8;
785 ctx->ghash = gcm_ghash_v8;
790 gcm_init_neon(ctx->Htable,ctx->H.u);
791 ctx->gmult = gcm_gmult_neon;
792 ctx->ghash = gcm_ghash_neon;
796 gcm_init_4bit(ctx->Htable,ctx->H.u);
797 ctx->gmult = gcm_gmult_4bit;
798 ctx->ghash = gcm_ghash_4bit;
800 # elif defined(GHASH_ASM_SPARC)
801 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
802 gcm_init_vis3(ctx->Htable,ctx->H.u);
803 ctx->gmult = gcm_gmult_vis3;
804 ctx->ghash = gcm_ghash_vis3;
806 gcm_init_4bit(ctx->Htable,ctx->H.u);
807 ctx->gmult = gcm_gmult_4bit;
808 ctx->ghash = gcm_ghash_4bit;
810 # elif defined(GHASH_ASM_PPC)
811 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
812 gcm_init_p8(ctx->Htable,ctx->H.u);
813 ctx->gmult = gcm_gmult_p8;
814 ctx->ghash = gcm_ghash_p8;
816 gcm_init_4bit(ctx->Htable,ctx->H.u);
817 ctx->gmult = gcm_gmult_4bit;
818 ctx->ghash = gcm_ghash_4bit;
821 gcm_init_4bit(ctx->Htable,ctx->H.u);
826 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
828 const union { long one; char little; } is_endian = {1};
830 #ifdef GCM_FUNCREF_4BIT
831 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
838 ctx->len.u[0] = 0; /* AAD length */
839 ctx->len.u[1] = 0; /* message length */
844 memcpy(ctx->Yi.c,iv,12);
853 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
859 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
863 if (is_endian.little) {
865 ctx->Yi.u[1] ^= BSWAP8(len0);
867 ctx->Yi.c[8] ^= (u8)(len0>>56);
868 ctx->Yi.c[9] ^= (u8)(len0>>48);
869 ctx->Yi.c[10] ^= (u8)(len0>>40);
870 ctx->Yi.c[11] ^= (u8)(len0>>32);
871 ctx->Yi.c[12] ^= (u8)(len0>>24);
872 ctx->Yi.c[13] ^= (u8)(len0>>16);
873 ctx->Yi.c[14] ^= (u8)(len0>>8);
874 ctx->Yi.c[15] ^= (u8)(len0);
878 ctx->Yi.u[1] ^= len0;
882 if (is_endian.little)
884 ctr = BSWAP4(ctx->Yi.d[3]);
886 ctr = GETU32(ctx->Yi.c+12);
892 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
894 if (is_endian.little)
896 ctx->Yi.d[3] = BSWAP4(ctr);
898 PUTU32(ctx->Yi.c+12,ctr);
904 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
908 u64 alen = ctx->len.u[0];
909 #ifdef GCM_FUNCREF_4BIT
910 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
912 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
913 const u8 *inp,size_t len) = ctx->ghash;
917 if (ctx->len.u[1]) return -2;
920 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
922 ctx->len.u[0] = alen;
927 ctx->Xi.c[n] ^= *(aad++);
931 if (n==0) GCM_MUL(ctx,Xi);
939 if ((i = (len&(size_t)-16))) {
946 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
953 n = (unsigned int)len;
954 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
961 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
962 const unsigned char *in, unsigned char *out,
965 const union { long one; char little; } is_endian = {1};
968 u64 mlen = ctx->len.u[1];
969 block128_f block = ctx->block;
970 void *key = ctx->key;
971 #ifdef GCM_FUNCREF_4BIT
972 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
974 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
975 const u8 *inp,size_t len) = ctx->ghash;
980 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
983 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
985 ctx->len.u[1] = mlen;
988 /* First call to encrypt finalizes GHASH(AAD) */
993 if (is_endian.little)
995 ctr = BSWAP4(ctx->Yi.d[3]);
997 ctr = GETU32(ctx->Yi.c+12);
1003 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1004 if (16%sizeof(size_t) == 0) { /* always true actually */
1008 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1012 if (n==0) GCM_MUL(ctx,Xi);
1018 #if defined(STRICT_ALIGNMENT)
1019 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1022 #if defined(GHASH) && defined(GHASH_CHUNK)
1023 while (len>=GHASH_CHUNK) {
1024 size_t j=GHASH_CHUNK;
1027 size_t *out_t=(size_t *)out;
1028 const size_t *in_t=(const size_t *)in;
1030 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1032 if (is_endian.little)
1034 ctx->Yi.d[3] = BSWAP4(ctr);
1036 PUTU32(ctx->Yi.c+12,ctr);
1040 for (i=0; i<16/sizeof(size_t); ++i)
1041 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1046 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1049 if ((i = (len&(size_t)-16))) {
1053 size_t *out_t=(size_t *)out;
1054 const size_t *in_t=(const size_t *)in;
1056 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1058 if (is_endian.little)
1060 ctx->Yi.d[3] = BSWAP4(ctr);
1062 PUTU32(ctx->Yi.c+12,ctr);
1066 for (i=0; i<16/sizeof(size_t); ++i)
1067 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1076 size_t *out_t=(size_t *)out;
1077 const size_t *in_t=(const size_t *)in;
1079 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1081 if (is_endian.little)
1083 ctx->Yi.d[3] = BSWAP4(ctr);
1085 PUTU32(ctx->Yi.c+12,ctr);
1089 for (i=0; i<16/sizeof(size_t); ++i)
1091 out_t[i] = in_t[i]^ctx->EKi.t[i];
1099 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1101 if (is_endian.little)
1103 ctx->Yi.d[3] = BSWAP4(ctr);
1105 PUTU32(ctx->Yi.c+12,ctr);
1110 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1120 for (i=0;i<len;++i) {
1122 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1124 if (is_endian.little)
1126 ctx->Yi.d[3] = BSWAP4(ctr);
1128 PUTU32(ctx->Yi.c+12,ctr);
1133 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1143 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1144 const unsigned char *in, unsigned char *out,
1147 const union { long one; char little; } is_endian = {1};
1148 unsigned int n, ctr;
1150 u64 mlen = ctx->len.u[1];
1151 block128_f block = ctx->block;
1152 void *key = ctx->key;
1153 #ifdef GCM_FUNCREF_4BIT
1154 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1156 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1157 const u8 *inp,size_t len) = ctx->ghash;
1162 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1164 ctx->len.u[1] = mlen;
1167 /* First call to decrypt finalizes GHASH(AAD) */
1172 if (is_endian.little)
1174 ctr = BSWAP4(ctx->Yi.d[3]);
1176 ctr = GETU32(ctx->Yi.c+12);
1182 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1183 if (16%sizeof(size_t) == 0) { /* always true actually */
1188 *(out++) = c^ctx->EKi.c[n];
1193 if (n==0) GCM_MUL (ctx,Xi);
1199 #if defined(STRICT_ALIGNMENT)
1200 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1203 #if defined(GHASH) && defined(GHASH_CHUNK)
1204 while (len>=GHASH_CHUNK) {
1205 size_t j=GHASH_CHUNK;
1207 GHASH(ctx,in,GHASH_CHUNK);
1209 size_t *out_t=(size_t *)out;
1210 const size_t *in_t=(const size_t *)in;
1212 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1214 if (is_endian.little)
1216 ctx->Yi.d[3] = BSWAP4(ctr);
1218 PUTU32(ctx->Yi.c+12,ctr);
1222 for (i=0; i<16/sizeof(size_t); ++i)
1223 out_t[i] = in_t[i]^ctx->EKi.t[i];
1230 if ((i = (len&(size_t)-16))) {
1233 size_t *out_t=(size_t *)out;
1234 const size_t *in_t=(const size_t *)in;
1236 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1238 if (is_endian.little)
1240 ctx->Yi.d[3] = BSWAP4(ctr);
1242 PUTU32(ctx->Yi.c+12,ctr);
1246 for (i=0; i<16/sizeof(size_t); ++i)
1247 out_t[i] = in_t[i]^ctx->EKi.t[i];
1255 size_t *out_t=(size_t *)out;
1256 const size_t *in_t=(const size_t *)in;
1258 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1260 if (is_endian.little)
1262 ctx->Yi.d[3] = BSWAP4(ctr);
1264 PUTU32(ctx->Yi.c+12,ctr);
1268 for (i=0; i<16/sizeof(size_t); ++i) {
1270 out[i] = c^ctx->EKi.t[i];
1280 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1282 if (is_endian.little)
1284 ctx->Yi.d[3] = BSWAP4(ctr);
1286 PUTU32(ctx->Yi.c+12,ctr);
1293 out[n] = c^ctx->EKi.c[n];
1303 for (i=0;i<len;++i) {
1306 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1308 if (is_endian.little)
1310 ctx->Yi.d[3] = BSWAP4(ctr);
1312 PUTU32(ctx->Yi.c+12,ctr);
1318 out[i] = c^ctx->EKi.c[n];
1329 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1330 const unsigned char *in, unsigned char *out,
1331 size_t len, ctr128_f stream)
1333 const union { long one; char little; } is_endian = {1};
1334 unsigned int n, ctr;
1336 u64 mlen = ctx->len.u[1];
1337 void *key = ctx->key;
1338 #ifdef GCM_FUNCREF_4BIT
1339 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1341 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1342 const u8 *inp,size_t len) = ctx->ghash;
1347 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1349 ctx->len.u[1] = mlen;
1352 /* First call to encrypt finalizes GHASH(AAD) */
1357 if (is_endian.little)
1359 ctr = BSWAP4(ctx->Yi.d[3]);
1361 ctr = GETU32(ctx->Yi.c+12);
1369 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1373 if (n==0) GCM_MUL(ctx,Xi);
1379 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1380 while (len>=GHASH_CHUNK) {
1381 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1382 ctr += GHASH_CHUNK/16;
1383 if (is_endian.little)
1385 ctx->Yi.d[3] = BSWAP4(ctr);
1387 PUTU32(ctx->Yi.c+12,ctr);
1391 GHASH(ctx,out,GHASH_CHUNK);
1397 if ((i = (len&(size_t)-16))) {
1400 (*stream)(in,out,j,key,ctx->Yi.c);
1401 ctr += (unsigned int)j;
1402 if (is_endian.little)
1404 ctx->Yi.d[3] = BSWAP4(ctr);
1406 PUTU32(ctx->Yi.c+12,ctr);
1417 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1424 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1426 if (is_endian.little)
1428 ctx->Yi.d[3] = BSWAP4(ctr);
1430 PUTU32(ctx->Yi.c+12,ctr);
1435 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1444 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1445 const unsigned char *in, unsigned char *out,
1446 size_t len,ctr128_f stream)
1448 const union { long one; char little; } is_endian = {1};
1449 unsigned int n, ctr;
1451 u64 mlen = ctx->len.u[1];
1452 void *key = ctx->key;
1453 #ifdef GCM_FUNCREF_4BIT
1454 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1456 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1457 const u8 *inp,size_t len) = ctx->ghash;
1462 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1464 ctx->len.u[1] = mlen;
1467 /* First call to decrypt finalizes GHASH(AAD) */
1472 if (is_endian.little)
1474 ctr = BSWAP4(ctx->Yi.d[3]);
1476 ctr = GETU32(ctx->Yi.c+12);
1485 *(out++) = c^ctx->EKi.c[n];
1490 if (n==0) GCM_MUL (ctx,Xi);
1496 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1497 while (len>=GHASH_CHUNK) {
1498 GHASH(ctx,in,GHASH_CHUNK);
1499 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1500 ctr += GHASH_CHUNK/16;
1501 if (is_endian.little)
1503 ctx->Yi.d[3] = BSWAP4(ctr);
1505 PUTU32(ctx->Yi.c+12,ctr);
1514 if ((i = (len&(size_t)-16))) {
1522 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1529 (*stream)(in,out,j,key,ctx->Yi.c);
1530 ctr += (unsigned int)j;
1531 if (is_endian.little)
1533 ctx->Yi.d[3] = BSWAP4(ctr);
1535 PUTU32(ctx->Yi.c+12,ctr);
1544 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1546 if (is_endian.little)
1548 ctx->Yi.d[3] = BSWAP4(ctr);
1550 PUTU32(ctx->Yi.c+12,ctr);
1557 out[n] = c^ctx->EKi.c[n];
1566 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1569 const union { long one; char little; } is_endian = {1};
1570 u64 alen = ctx->len.u[0]<<3;
1571 u64 clen = ctx->len.u[1]<<3;
1572 #ifdef GCM_FUNCREF_4BIT
1573 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1576 if (ctx->mres || ctx->ares)
1579 if (is_endian.little) {
1581 alen = BSWAP8(alen);
1582 clen = BSWAP8(clen);
1586 ctx->len.u[0] = alen;
1587 ctx->len.u[1] = clen;
1589 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1590 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1594 ctx->Xi.u[0] ^= alen;
1595 ctx->Xi.u[1] ^= clen;
1598 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1599 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1601 if (tag && len<=sizeof(ctx->Xi))
1602 return memcmp(ctx->Xi.c,tag,len);
1607 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1609 CRYPTO_gcm128_finish(ctx, NULL, 0);
1610 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1613 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1615 GCM128_CONTEXT *ret;
1617 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1618 CRYPTO_gcm128_init(ret,key,block);
1623 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1626 OPENSSL_cleanse(ctx,sizeof(*ctx));
1631 #if defined(SELFTEST)
1633 #include <openssl/aes.h>
1636 static const u8 K1[16],
1641 static const u8 T1[]= {
1642 0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,
1643 0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a
1650 static const u8 P2[16];
1651 static const u8 C2[]= {
1652 0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,
1653 0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78
1655 static const u8 T2[]= {
1656 0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,
1657 0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf
1662 static const u8 K3[]= {
1663 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1664 0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08
1666 static const u8 P3[]= {
1667 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1668 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1669 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1670 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1671 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1672 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1673 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1674 0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55
1676 static const u8 IV3[]= {
1677 0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
1678 0xde,0xca,0xf8,0x88};
1679 static const u8 C3[]= {
1680 0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,
1681 0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1682 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,
1683 0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1684 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,
1685 0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1686 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,
1687 0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85
1689 static const u8 T3[]= {
1690 0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,
1691 0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4
1697 static const u8 P4[]= {
1698 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1699 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1700 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1701 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1702 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1703 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1704 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1705 0xba,0x63,0x7b,0x39};
1706 static const u8 A4[]= {
1707 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1708 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1709 0xab,0xad,0xda,0xd2};
1710 static const u8 C4[]= {
1711 0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,
1712 0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1713 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,
1714 0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1715 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,
1716 0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1717 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,
1720 static const u8 T4[]= {
1721 0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,
1722 0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47
1729 static const u8 IV5[]= {
1730 0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad
1732 static const u8 C5[]= {
1733 0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,
1734 0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1735 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,
1736 0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1737 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,
1738 0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1739 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,
1740 0xc2,0x3f,0x45,0x98};
1741 static const u8 T5[]= {
1742 0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,
1743 0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb
1750 static const u8 IV6[]= {
1751 0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,
1752 0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1753 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,
1754 0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1755 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,
1756 0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1757 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,
1760 static const u8 C6[]= {
1761 0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,
1762 0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1763 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,
1764 0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1765 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,
1766 0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1767 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,
1770 static const u8 T6[]= {
1771 0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,
1772 0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50
1776 static const u8 K7[24],
1781 static const u8 T7[]= {
1782 0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,
1783 0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35
1790 static const u8 P8[16];
1791 static const u8 C8[]= {
1792 0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,
1793 0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00
1795 static const u8 T8[]= {
1796 0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,
1797 0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb
1802 static const u8 K9[]= {
1803 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1804 0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1805 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c
1807 static const u8 P9[]= {
1808 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1809 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1810 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1811 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1812 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1813 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1814 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1815 0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55
1817 static const u8 IV9[]= {
1818 0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
1821 static const u8 C9[]= {
1822 0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,
1823 0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1824 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,
1825 0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1826 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,
1827 0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1828 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,
1829 0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56
1831 static const u8 T9[]= {
1832 0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,
1833 0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14
1839 static const u8 P10[]= {
1840 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1841 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1842 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1843 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1844 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1845 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1846 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1849 static const u8 A10[]= {
1850 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1851 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1854 static const u8 C10[]= {
1855 0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,
1856 0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1857 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,
1858 0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1859 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,
1860 0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1861 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,
1864 static const u8 T10[]= {
1865 0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,
1866 0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c
1873 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad};
1874 static const u8 C11[]= {
1875 0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,
1876 0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1877 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,
1878 0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1879 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,
1880 0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1881 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,
1882 0xa0,0xf0,0x62,0xf7};
1883 static const u8 T11[]= {
1884 0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,
1885 0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8
1892 static const u8 IV12[]={
1893 0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,
1894 0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1895 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,
1896 0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1897 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,
1898 0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1899 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,
1902 static const u8 C12[]= {
1903 0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,
1904 0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1905 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,
1906 0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1907 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,
1908 0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1909 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,
1912 static const u8 T12[]= {
1913 0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,
1914 0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9
1918 static const u8 K13[32],
1923 static const u8 T13[]= {
1924 0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,
1925 0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b
1931 static const u8 P14[16],
1933 static const u8 C14[]= {
1934 0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,
1935 0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18
1937 static const u8 T14[]= {
1938 0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,
1939 0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19
1944 static const u8 K15[]= {
1945 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1946 0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1947 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1948 0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08
1950 static const u8 P15[]= {
1951 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1952 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1953 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1954 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1955 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1956 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1957 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1958 0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55
1960 static const u8 IV15[]={
1961 0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
1964 static const u8 C15[]= {
1965 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,
1966 0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1967 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,
1968 0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1969 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,
1970 0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1971 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,
1972 0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad
1974 static const u8 T15[]= {
1975 0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,
1976 0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c
1982 static const u8 P16[]= {
1983 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1984 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1985 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1986 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1987 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1988 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1989 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1992 static const u8 A16[]= {
1993 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1994 0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1997 static const u8 C16[]= {
1998 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,
1999 0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
2000 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,
2001 0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
2002 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,
2003 0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
2004 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,
2007 static const u8 T16[]= {
2008 0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,
2009 0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b
2016 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad};
2017 static const u8 C17[]= {
2018 0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,
2019 0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
2020 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,
2021 0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
2022 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,
2023 0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
2024 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,
2027 static const u8 T17[]= {
2028 0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,
2029 0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2
2036 static const u8 IV18[]={
2037 0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,
2038 0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
2039 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,
2040 0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
2041 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,
2042 0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
2043 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,
2046 static const u8 C18[]= {
2047 0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,
2048 0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
2049 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,
2050 0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
2051 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,
2052 0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
2053 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,
2056 static const u8 T18[]= {
2057 0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,
2058 0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a
2066 static const u8 A19[]= {
2067 0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
2068 0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
2069 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
2070 0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
2071 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
2072 0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
2073 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
2074 0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
2075 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,
2076 0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
2077 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,
2078 0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
2079 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,
2080 0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
2081 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,
2082 0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad
2084 static const u8 T19[]= {
2085 0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,
2086 0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92
2092 static const u8 IV20[64]={0xff,0xff,0xff,0xff}; /* this results in 0xff in counter LSB */
2093 static const u8 P20[288];
2094 static const u8 C20[]= {
2095 0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,
2096 0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
2097 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,
2098 0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
2099 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,
2100 0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
2101 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,
2102 0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
2103 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,
2104 0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
2105 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,
2106 0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
2107 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,
2108 0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
2109 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,
2110 0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
2111 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,
2112 0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
2113 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,
2114 0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
2115 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,
2116 0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
2117 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,
2118 0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
2119 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,
2120 0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
2121 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,
2122 0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
2123 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,
2124 0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
2125 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,
2126 0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
2127 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,
2128 0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
2129 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,
2130 0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c
2132 static const u8 T20[]= {
2133 0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,
2134 0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f
2137 #define TEST_CASE(n) do { \
2138 u8 out[sizeof(P##n)]; \
2139 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2140 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2141 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2142 memset(out,0,sizeof(out)); \
2143 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2144 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2145 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2146 (C##n && memcmp(out,C##n,sizeof(out)))) \
2147 ret++, printf ("encrypt test#%d failed.\n",n); \
2148 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2149 memset(out,0,sizeof(out)); \
2150 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2151 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2152 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2153 (P##n && memcmp(out,P##n,sizeof(out)))) \
2154 ret++, printf ("decrypt test#%d failed.\n",n); \
2184 #ifdef OPENSSL_CPUID_OBJ
2186 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
2187 union { u64 u; u8 c[1024]; } buf;
2190 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
2191 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
2192 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
2194 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
2195 start = OPENSSL_rdtsc();
2196 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
2197 gcm_t = OPENSSL_rdtsc() - start;
2199 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
2200 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2201 (block128_f)AES_encrypt);
2202 start = OPENSSL_rdtsc();
2203 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
2204 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2205 (block128_f)AES_encrypt);
2206 ctr_t = OPENSSL_rdtsc() - start;
2208 printf("%.2f-%.2f=%.2f\n",
2209 gcm_t/(double)sizeof(buf),
2210 ctr_t/(double)sizeof(buf),
2211 (gcm_t-ctr_t)/(double)sizeof(buf));
2214 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
2215 const u8 *inp,size_t len) = ctx.ghash;
2217 GHASH((&ctx),buf.c,sizeof(buf));
2218 start = OPENSSL_rdtsc();
2219 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
2220 gcm_t = OPENSSL_rdtsc() - start;
2221 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);