1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
61 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62 /* redefine, because alignment is ensured */
64 # define GETU32(p) BSWAP4(*(const u32 *)(p))
66 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70 #define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
115 * Value of 1 is not appropriate for performance reasons.
119 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
143 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 const u8 *xi = (const u8 *)Xi + 15;
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
220 Z.hi ^= Htable[n].hi;
221 Z.lo ^= Htable[n].lo;
228 rem = (size_t)Z.lo & 0xff;
229 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
231 if (sizeof(size_t) == 8)
232 Z.hi ^= rem_8bit[rem];
234 Z.hi ^= (u64)rem_8bit[rem] << 32;
237 if (is_endian.little) {
239 Xi[0] = BSWAP8(Z.hi);
240 Xi[1] = BSWAP8(Z.lo);
244 v = (u32)(Z.hi >> 32);
248 v = (u32)(Z.lo >> 32);
259 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
263 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
266 # if defined(OPENSSL_SMALL_FOOTPRINT)
275 # if defined(OPENSSL_SMALL_FOOTPRINT)
276 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
281 for (i = 2; i < 16; i <<= 1) {
282 u128 *Hi = Htable + i;
284 for (V = *Hi, j = 1; j < i; ++j) {
285 Hi[j].hi = V.hi ^ Htable[j].hi;
286 Hi[j].lo = V.lo ^ Htable[j].lo;
297 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
299 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
300 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
301 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
303 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
304 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
305 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
306 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
307 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
308 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
309 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
311 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
313 * ARM assembler expects specific dword order in Htable.
322 if (is_endian.little)
323 for (j = 0; j < 16; ++j) {
328 for (j = 0; j < 16; ++j) {
330 Htable[j].hi = V.lo << 32 | V.lo >> 32;
331 Htable[j].lo = V.hi << 32 | V.hi >> 32;
338 static const size_t rem_4bit[16] = {
339 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
340 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
341 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
342 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
345 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
349 size_t rem, nlo, nhi;
355 nlo = ((const u8 *)Xi)[15];
359 Z.hi = Htable[nlo].hi;
360 Z.lo = Htable[nlo].lo;
363 rem = (size_t)Z.lo & 0xf;
364 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
366 if (sizeof(size_t) == 8)
367 Z.hi ^= rem_4bit[rem];
369 Z.hi ^= (u64)rem_4bit[rem] << 32;
371 Z.hi ^= Htable[nhi].hi;
372 Z.lo ^= Htable[nhi].lo;
377 nlo = ((const u8 *)Xi)[cnt];
381 rem = (size_t)Z.lo & 0xf;
382 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
384 if (sizeof(size_t) == 8)
385 Z.hi ^= rem_4bit[rem];
387 Z.hi ^= (u64)rem_4bit[rem] << 32;
389 Z.hi ^= Htable[nlo].hi;
390 Z.lo ^= Htable[nlo].lo;
393 if (is_endian.little) {
395 Xi[0] = BSWAP8(Z.hi);
396 Xi[1] = BSWAP8(Z.lo);
400 v = (u32)(Z.hi >> 32);
404 v = (u32)(Z.lo >> 32);
415 # if !defined(OPENSSL_SMALL_FOOTPRINT)
417 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
418 * details... Compiler-generated code doesn't seem to give any
419 * performance improvement, at least not on x86[_64]. It's here
420 * mostly as reference and a placeholder for possible future
421 * non-trivial optimization[s]...
423 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
424 const u8 *inp, size_t len)
428 size_t rem, nlo, nhi;
437 nlo = ((const u8 *)Xi)[15];
442 Z.hi = Htable[nlo].hi;
443 Z.lo = Htable[nlo].lo;
446 rem = (size_t)Z.lo & 0xf;
447 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
449 if (sizeof(size_t) == 8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem] << 32;
454 Z.hi ^= Htable[nhi].hi;
455 Z.lo ^= Htable[nhi].lo;
460 nlo = ((const u8 *)Xi)[cnt];
465 rem = (size_t)Z.lo & 0xf;
466 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
468 if (sizeof(size_t) == 8)
469 Z.hi ^= rem_4bit[rem];
471 Z.hi ^= (u64)rem_4bit[rem] << 32;
473 Z.hi ^= Htable[nlo].hi;
474 Z.lo ^= Htable[nlo].lo;
478 * Extra 256+16 bytes per-key plus 512 bytes shared tables
479 * [should] give ~50% improvement... One could have PACK()-ed
480 * the rem_8bit even here, but the priority is to minimize
483 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
484 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
485 static const unsigned short rem_8bit[256] = {
486 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
487 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
488 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
489 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
490 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
491 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
492 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
493 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
494 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
495 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
496 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
497 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
498 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
499 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
500 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
501 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
502 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
503 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
504 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
505 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
506 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
507 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
508 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
509 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
510 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
511 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
512 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
513 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
514 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
515 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
516 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
517 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
520 * This pre-processing phase slows down procedure by approximately
521 * same time as it makes each loop spin faster. In other words
522 * single block performance is approximately same as straightforward
523 * "4-bit" implementation, and then it goes only faster...
525 for (cnt = 0; cnt < 16; ++cnt) {
526 Z.hi = Htable[cnt].hi;
527 Z.lo = Htable[cnt].lo;
528 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
529 Hshr4[cnt].hi = (Z.hi >> 4);
530 Hshl4[cnt] = (u8)(Z.lo << 4);
534 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
535 nlo = ((const u8 *)Xi)[cnt];
540 Z.hi ^= Htable[nlo].hi;
541 Z.lo ^= Htable[nlo].lo;
543 rem = (size_t)Z.lo & 0xff;
545 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
548 Z.hi ^= Hshr4[nhi].hi;
549 Z.lo ^= Hshr4[nhi].lo;
550 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
553 nlo = ((const u8 *)Xi)[0];
558 Z.hi ^= Htable[nlo].hi;
559 Z.lo ^= Htable[nlo].lo;
561 rem = (size_t)Z.lo & 0xf;
563 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
566 Z.hi ^= Htable[nhi].hi;
567 Z.lo ^= Htable[nhi].lo;
568 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
571 if (is_endian.little) {
573 Xi[0] = BSWAP8(Z.hi);
574 Xi[1] = BSWAP8(Z.lo);
578 v = (u32)(Z.hi >> 32);
582 v = (u32)(Z.lo >> 32);
591 } while (inp += 16, len -= 16);
595 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
596 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
600 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
601 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
602 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
604 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
605 * effect. In other words idea is to hash data while it's still in L1 cache
606 * after encryption pass...
608 # define GHASH_CHUNK (3*1024)
611 #else /* TABLE_BITS */
613 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
615 u128 V, Z = { 0, 0 };
618 const long *xi = (const long *)Xi;
624 V.hi = H[0]; /* H is in host byte order, no byte swapping */
627 for (j = 0; j < 16 / sizeof(long); ++j) {
628 if (is_endian.little) {
629 if (sizeof(long) == 8) {
631 X = (long)(BSWAP8(xi[j]));
633 const u8 *p = (const u8 *)(xi + j);
634 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
637 const u8 *p = (const u8 *)(xi + j);
643 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
644 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
652 if (is_endian.little) {
654 Xi[0] = BSWAP8(Z.hi);
655 Xi[1] = BSWAP8(Z.lo);
659 v = (u32)(Z.hi >> 32);
663 v = (u32)(Z.lo >> 32);
674 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
678 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
679 # if !defined(I386_ONLY) && \
680 (defined(__i386) || defined(__i386__) || \
681 defined(__x86_64) || defined(__x86_64__) || \
682 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
683 # define GHASH_ASM_X86_OR_64
684 # define GCM_FUNCREF_4BIT
685 extern unsigned int OPENSSL_ia32cap_P[2];
687 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
688 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
689 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
693 # define gcm_init_avx gcm_init_clmul
694 # define gcm_gmult_avx gcm_gmult_clmul
695 # define gcm_ghash_avx gcm_ghash_clmul
697 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
698 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
703 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
704 # define GHASH_ASM_X86
705 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
706 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
709 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
710 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
713 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
714 # include "arm_arch.h"
715 # if __ARM_MAX_ARCH__>=7
716 # define GHASH_ASM_ARM
717 # define GCM_FUNCREF_4BIT
718 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
719 # if defined(__arm__) || defined(__arm)
720 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
722 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
723 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
724 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
726 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
727 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
728 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
731 # elif defined(__sparc__) || defined(__sparc)
732 # include "sparc_arch.h"
733 # define GHASH_ASM_SPARC
734 # define GCM_FUNCREF_4BIT
735 extern unsigned int OPENSSL_sparcv9cap_P[];
736 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
737 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
738 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
740 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
741 # include "ppc_arch.h"
742 # define GHASH_ASM_PPC
743 # define GCM_FUNCREF_4BIT
744 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
745 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
746 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
751 #ifdef GCM_FUNCREF_4BIT
753 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
756 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
760 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
767 memset(ctx, 0, sizeof(*ctx));
771 (*block) (ctx->H.c, ctx->H.c, key);
773 if (is_endian.little) {
774 /* H is stored in host byte order */
776 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
777 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
781 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
782 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
788 gcm_init_8bit(ctx->Htable, ctx->H.u);
791 # define CTX__GHASH(f) (ctx->ghash = (f))
793 # define CTX__GHASH(f) (ctx->ghash = NULL)
795 # if defined(GHASH_ASM_X86_OR_64)
796 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
797 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
798 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
799 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
800 gcm_init_avx(ctx->Htable, ctx->H.u);
801 ctx->gmult = gcm_gmult_avx;
802 CTX__GHASH(gcm_ghash_avx);
804 gcm_init_clmul(ctx->Htable, ctx->H.u);
805 ctx->gmult = gcm_gmult_clmul;
806 CTX__GHASH(gcm_ghash_clmul);
811 gcm_init_4bit(ctx->Htable, ctx->H.u);
812 # if defined(GHASH_ASM_X86) /* x86 only */
813 # if defined(OPENSSL_IA32_SSE2)
814 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
816 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
818 ctx->gmult = gcm_gmult_4bit_mmx;
819 CTX__GHASH(gcm_ghash_4bit_mmx);
821 ctx->gmult = gcm_gmult_4bit_x86;
822 CTX__GHASH(gcm_ghash_4bit_x86);
825 ctx->gmult = gcm_gmult_4bit;
826 CTX__GHASH(gcm_ghash_4bit);
828 # elif defined(GHASH_ASM_ARM)
829 # ifdef PMULL_CAPABLE
831 gcm_init_v8(ctx->Htable, ctx->H.u);
832 ctx->gmult = gcm_gmult_v8;
833 CTX__GHASH(gcm_ghash_v8);
838 gcm_init_neon(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_neon;
840 CTX__GHASH(gcm_ghash_neon);
844 gcm_init_4bit(ctx->Htable, ctx->H.u);
845 ctx->gmult = gcm_gmult_4bit;
846 CTX__GHASH(gcm_ghash_4bit);
848 # elif defined(GHASH_ASM_SPARC)
849 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
850 gcm_init_vis3(ctx->Htable, ctx->H.u);
851 ctx->gmult = gcm_gmult_vis3;
852 CTX__GHASH(gcm_ghash_vis3);
854 gcm_init_4bit(ctx->Htable, ctx->H.u);
855 ctx->gmult = gcm_gmult_4bit;
856 CTX__GHASH(gcm_ghash_4bit);
858 # elif defined(GHASH_ASM_PPC)
859 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
860 gcm_init_p8(ctx->Htable, ctx->H.u);
861 ctx->gmult = gcm_gmult_p8;
862 CTX__GHASH(gcm_ghash_p8);
864 gcm_init_4bit(ctx->Htable, ctx->H.u);
865 ctx->gmult = gcm_gmult_4bit;
866 CTX__GHASH(gcm_ghash_4bit);
869 gcm_init_4bit(ctx->Htable, ctx->H.u);
875 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
883 #ifdef GCM_FUNCREF_4BIT
884 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
891 ctx->len.u[0] = 0; /* AAD length */
892 ctx->len.u[1] = 0; /* message length */
897 memcpy(ctx->Yi.c, iv, 12);
905 for (i = 0; i < 16; ++i)
906 ctx->Yi.c[i] ^= iv[i];
912 for (i = 0; i < len; ++i)
913 ctx->Yi.c[i] ^= iv[i];
917 if (is_endian.little) {
919 ctx->Yi.u[1] ^= BSWAP8(len0);
921 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
922 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
923 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
924 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
925 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
926 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
927 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
928 ctx->Yi.c[15] ^= (u8)(len0);
931 ctx->Yi.u[1] ^= len0;
935 if (is_endian.little)
937 ctr = BSWAP4(ctx->Yi.d[3]);
939 ctr = GETU32(ctx->Yi.c + 12);
945 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
947 if (is_endian.little)
949 ctx->Yi.d[3] = BSWAP4(ctr);
951 PUTU32(ctx->Yi.c + 12, ctr);
957 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
962 u64 alen = ctx->len.u[0];
963 #ifdef GCM_FUNCREF_4BIT
964 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
966 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
967 const u8 *inp, size_t len) = ctx->ghash;
975 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
977 ctx->len.u[0] = alen;
982 ctx->Xi.c[n] ^= *(aad++);
994 if ((i = (len & (size_t)-16))) {
1001 for (i = 0; i < 16; ++i)
1002 ctx->Xi.c[i] ^= aad[i];
1009 n = (unsigned int)len;
1010 for (i = 0; i < len; ++i)
1011 ctx->Xi.c[i] ^= aad[i];
1018 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1019 const unsigned char *in, unsigned char *out,
1025 } is_endian = { 1 };
1026 unsigned int n, ctr;
1028 u64 mlen = ctx->len.u[1];
1029 block128_f block = ctx->block;
1030 void *key = ctx->key;
1031 #ifdef GCM_FUNCREF_4BIT
1032 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1033 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1034 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1035 const u8 *inp, size_t len) = ctx->ghash;
1040 n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
1043 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1045 ctx->len.u[1] = mlen;
1048 /* First call to encrypt finalizes GHASH(AAD) */
1053 if (is_endian.little)
1055 ctr = BSWAP4(ctx->Yi.d[3]);
1057 ctr = GETU32(ctx->Yi.c + 12);
1063 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1064 if (16 % sizeof(size_t) == 0) { /* always true actually */
1068 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1079 # if defined(STRICT_ALIGNMENT)
1080 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1084 # if defined(GHASH_CHUNK)
1085 while (len >= GHASH_CHUNK) {
1086 size_t j = GHASH_CHUNK;
1089 size_t *out_t = (size_t *)out;
1090 const size_t *in_t = (const size_t *)in;
1092 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1094 if (is_endian.little)
1096 ctx->Yi.d[3] = BSWAP4(ctr);
1098 PUTU32(ctx->Yi.c + 12, ctr);
1102 for (i = 0; i < 16 / sizeof(size_t); ++i)
1103 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1108 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1112 if ((i = (len & (size_t)-16))) {
1116 size_t *out_t = (size_t *)out;
1117 const size_t *in_t = (const size_t *)in;
1119 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1121 if (is_endian.little)
1123 ctx->Yi.d[3] = BSWAP4(ctr);
1125 PUTU32(ctx->Yi.c + 12, ctr);
1129 for (i = 0; i < 16 / sizeof(size_t); ++i)
1130 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1135 GHASH(ctx, out - j, j);
1139 size_t *out_t = (size_t *)out;
1140 const size_t *in_t = (const size_t *)in;
1142 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1144 if (is_endian.little)
1146 ctx->Yi.d[3] = BSWAP4(ctr);
1148 PUTU32(ctx->Yi.c + 12, ctr);
1152 for (i = 0; i < 16 / sizeof(size_t); ++i)
1153 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1161 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1163 if (is_endian.little)
1165 ctx->Yi.d[3] = BSWAP4(ctr);
1167 PUTU32(ctx->Yi.c + 12, ctr);
1172 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1182 for (i = 0; i < len; ++i) {
1184 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1186 if (is_endian.little)
1188 ctx->Yi.d[3] = BSWAP4(ctr);
1190 PUTU32(ctx->Yi.c + 12, ctr);
1195 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1205 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1206 const unsigned char *in, unsigned char *out,
1212 } is_endian = { 1 };
1213 unsigned int n, ctr;
1215 u64 mlen = ctx->len.u[1];
1216 block128_f block = ctx->block;
1217 void *key = ctx->key;
1218 #ifdef GCM_FUNCREF_4BIT
1219 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1220 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1221 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1222 const u8 *inp, size_t len) = ctx->ghash;
1227 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1229 ctx->len.u[1] = mlen;
1232 /* First call to decrypt finalizes GHASH(AAD) */
1237 if (is_endian.little)
1239 ctr = BSWAP4(ctx->Yi.d[3]);
1241 ctr = GETU32(ctx->Yi.c + 12);
1247 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1248 if (16 % sizeof(size_t) == 0) { /* always true actually */
1253 *(out++) = c ^ ctx->EKi.c[n];
1265 # if defined(STRICT_ALIGNMENT)
1266 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1270 # if defined(GHASH_CHUNK)
1271 while (len >= GHASH_CHUNK) {
1272 size_t j = GHASH_CHUNK;
1274 GHASH(ctx, in, GHASH_CHUNK);
1276 size_t *out_t = (size_t *)out;
1277 const size_t *in_t = (const size_t *)in;
1279 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1281 if (is_endian.little)
1283 ctx->Yi.d[3] = BSWAP4(ctr);
1285 PUTU32(ctx->Yi.c + 12, ctr);
1289 for (i = 0; i < 16 / sizeof(size_t); ++i)
1290 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1298 if ((i = (len & (size_t)-16))) {
1301 size_t *out_t = (size_t *)out;
1302 const size_t *in_t = (const size_t *)in;
1304 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1306 if (is_endian.little)
1308 ctx->Yi.d[3] = BSWAP4(ctr);
1310 PUTU32(ctx->Yi.c + 12, ctr);
1314 for (i = 0; i < 16 / sizeof(size_t); ++i)
1315 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1323 size_t *out_t = (size_t *)out;
1324 const size_t *in_t = (const size_t *)in;
1326 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1328 if (is_endian.little)
1330 ctx->Yi.d[3] = BSWAP4(ctr);
1332 PUTU32(ctx->Yi.c + 12, ctr);
1336 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1338 out[i] = c ^ ctx->EKi.t[i];
1348 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1350 if (is_endian.little)
1352 ctx->Yi.d[3] = BSWAP4(ctr);
1354 PUTU32(ctx->Yi.c + 12, ctr);
1361 out[n] = c ^ ctx->EKi.c[n];
1371 for (i = 0; i < len; ++i) {
1374 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1376 if (is_endian.little)
1378 ctx->Yi.d[3] = BSWAP4(ctr);
1380 PUTU32(ctx->Yi.c + 12, ctr);
1386 out[i] = c ^ ctx->EKi.c[n];
1397 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1398 const unsigned char *in, unsigned char *out,
1399 size_t len, ctr128_f stream)
1401 #if defined(OPENSSL_SMALL_FOOTPRINT)
1402 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1407 } is_endian = { 1 };
1408 unsigned int n, ctr;
1410 u64 mlen = ctx->len.u[1];
1411 void *key = ctx->key;
1412 # ifdef GCM_FUNCREF_4BIT
1413 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1415 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1416 const u8 *inp, size_t len) = ctx->ghash;
1421 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1423 ctx->len.u[1] = mlen;
1426 /* First call to encrypt finalizes GHASH(AAD) */
1431 if (is_endian.little)
1433 ctr = BSWAP4(ctx->Yi.d[3]);
1435 ctr = GETU32(ctx->Yi.c + 12);
1443 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1454 # if defined(GHASH) && defined(GHASH_CHUNK)
1455 while (len >= GHASH_CHUNK) {
1456 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1457 ctr += GHASH_CHUNK / 16;
1458 if (is_endian.little)
1460 ctx->Yi.d[3] = BSWAP4(ctr);
1462 PUTU32(ctx->Yi.c + 12, ctr);
1466 GHASH(ctx, out, GHASH_CHUNK);
1472 if ((i = (len & (size_t)-16))) {
1475 (*stream) (in, out, j, key, ctx->Yi.c);
1476 ctr += (unsigned int)j;
1477 if (is_endian.little)
1479 ctx->Yi.d[3] = BSWAP4(ctr);
1481 PUTU32(ctx->Yi.c + 12, ctr);
1492 for (i = 0; i < 16; ++i)
1493 ctx->Xi.c[i] ^= out[i];
1500 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1502 if (is_endian.little)
1504 ctx->Yi.d[3] = BSWAP4(ctr);
1506 PUTU32(ctx->Yi.c + 12, ctr);
1511 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1521 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1522 const unsigned char *in, unsigned char *out,
1523 size_t len, ctr128_f stream)
1525 #if defined(OPENSSL_SMALL_FOOTPRINT)
1526 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1531 } is_endian = { 1 };
1532 unsigned int n, ctr;
1534 u64 mlen = ctx->len.u[1];
1535 void *key = ctx->key;
1536 # ifdef GCM_FUNCREF_4BIT
1537 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1539 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1540 const u8 *inp, size_t len) = ctx->ghash;
1545 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1547 ctx->len.u[1] = mlen;
1550 /* First call to decrypt finalizes GHASH(AAD) */
1555 if (is_endian.little)
1557 ctr = BSWAP4(ctx->Yi.d[3]);
1559 ctr = GETU32(ctx->Yi.c + 12);
1568 *(out++) = c ^ ctx->EKi.c[n];
1580 # if defined(GHASH) && defined(GHASH_CHUNK)
1581 while (len >= GHASH_CHUNK) {
1582 GHASH(ctx, in, GHASH_CHUNK);
1583 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1584 ctr += GHASH_CHUNK / 16;
1585 if (is_endian.little)
1587 ctx->Yi.d[3] = BSWAP4(ctr);
1589 PUTU32(ctx->Yi.c + 12, ctr);
1598 if ((i = (len & (size_t)-16))) {
1606 for (k = 0; k < 16; ++k)
1607 ctx->Xi.c[k] ^= in[k];
1614 (*stream) (in, out, j, key, ctx->Yi.c);
1615 ctr += (unsigned int)j;
1616 if (is_endian.little)
1618 ctx->Yi.d[3] = BSWAP4(ctr);
1620 PUTU32(ctx->Yi.c + 12, ctr);
1629 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1631 if (is_endian.little)
1633 ctx->Yi.d[3] = BSWAP4(ctr);
1635 PUTU32(ctx->Yi.c + 12, ctr);
1642 out[n] = c ^ ctx->EKi.c[n];
1652 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1658 } is_endian = { 1 };
1659 u64 alen = ctx->len.u[0] << 3;
1660 u64 clen = ctx->len.u[1] << 3;
1661 #ifdef GCM_FUNCREF_4BIT
1662 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1665 if (ctx->mres || ctx->ares)
1668 if (is_endian.little) {
1670 alen = BSWAP8(alen);
1671 clen = BSWAP8(clen);
1675 ctx->len.u[0] = alen;
1676 ctx->len.u[1] = clen;
1678 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1679 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1683 ctx->Xi.u[0] ^= alen;
1684 ctx->Xi.u[1] ^= clen;
1687 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1688 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1690 if (tag && len <= sizeof(ctx->Xi))
1691 return memcmp(ctx->Xi.c, tag, len);
1696 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1698 CRYPTO_gcm128_finish(ctx, NULL, 0);
1699 memcpy(tag, ctx->Xi.c,
1700 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1703 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1705 GCM128_CONTEXT *ret;
1707 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1708 CRYPTO_gcm128_init(ret, key, block);
1713 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1716 OPENSSL_cleanse(ctx, sizeof(*ctx));
1721 #if defined(SELFTEST)
1723 # include <openssl/aes.h>
1726 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1727 static const u8 T1[] = {
1728 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1729 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1736 static const u8 P2[16];
1737 static const u8 C2[] = {
1738 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1739 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1742 static const u8 T2[] = {
1743 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1744 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1749 static const u8 K3[] = {
1750 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1751 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1754 static const u8 P3[] = {
1755 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1756 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1757 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1758 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1759 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1760 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1761 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1762 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1765 static const u8 IV3[] = {
1766 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1767 0xde, 0xca, 0xf8, 0x88
1770 static const u8 C3[] = {
1771 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1772 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1773 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1774 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1775 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1776 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1777 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1778 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1781 static const u8 T3[] = {
1782 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1783 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1789 static const u8 P4[] = {
1790 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1791 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1792 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1793 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1794 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1795 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1796 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1797 0xba, 0x63, 0x7b, 0x39
1800 static const u8 A4[] = {
1801 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1802 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1803 0xab, 0xad, 0xda, 0xd2
1806 static const u8 C4[] = {
1807 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1808 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1809 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1810 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1811 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1812 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1813 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1814 0x3d, 0x58, 0xe0, 0x91
1817 static const u8 T4[] = {
1818 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1819 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1826 static const u8 IV5[] = {
1827 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1830 static const u8 C5[] = {
1831 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1832 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1833 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1834 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1835 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1836 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1837 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1838 0xc2, 0x3f, 0x45, 0x98
1841 static const u8 T5[] = {
1842 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1843 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1850 static const u8 IV6[] = {
1851 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1852 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1853 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1854 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1855 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1856 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1857 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1858 0xa6, 0x37, 0xb3, 0x9b
1861 static const u8 C6[] = {
1862 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1863 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1864 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1865 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1866 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1867 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1868 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1869 0x4c, 0x34, 0xae, 0xe5
1872 static const u8 T6[] = {
1873 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1874 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1878 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1879 static const u8 T7[] = {
1880 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1881 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1888 static const u8 P8[16];
1889 static const u8 C8[] = {
1890 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1891 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1894 static const u8 T8[] = {
1895 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1896 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1901 static const u8 K9[] = {
1902 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1903 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1904 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1907 static const u8 P9[] = {
1908 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1909 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1910 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1911 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1912 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1913 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1914 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1915 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1918 static const u8 IV9[] = {
1919 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1920 0xde, 0xca, 0xf8, 0x88
1923 static const u8 C9[] = {
1924 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1925 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1926 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1927 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1928 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1929 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1930 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1931 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1934 static const u8 T9[] = {
1935 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1936 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1942 static const u8 P10[] = {
1943 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1944 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1945 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1946 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1947 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1948 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1949 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1950 0xba, 0x63, 0x7b, 0x39
1953 static const u8 A10[] = {
1954 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1955 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1956 0xab, 0xad, 0xda, 0xd2
1959 static const u8 C10[] = {
1960 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1961 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1962 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1963 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1964 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1965 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1966 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1967 0xcc, 0xda, 0x27, 0x10
1970 static const u8 T10[] = {
1971 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1972 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1979 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1981 static const u8 C11[] = {
1982 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1983 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1984 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1985 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1986 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1987 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1988 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1989 0xa0, 0xf0, 0x62, 0xf7
1992 static const u8 T11[] = {
1993 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
1994 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2001 static const u8 IV12[] = {
2002 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2003 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2004 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2005 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2006 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2007 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2008 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2009 0xa6, 0x37, 0xb3, 0x9b
2012 static const u8 C12[] = {
2013 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2014 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2015 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2016 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2017 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2018 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2019 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2020 0xe9, 0xb7, 0x37, 0x3b
2023 static const u8 T12[] = {
2024 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2025 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2029 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2030 static const u8 T13[] = {
2031 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2032 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2038 static const u8 P14[16], IV14[12];
2039 static const u8 C14[] = {
2040 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2041 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2044 static const u8 T14[] = {
2045 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2046 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2051 static const u8 K15[] = {
2052 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2053 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2054 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2055 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2058 static const u8 P15[] = {
2059 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2060 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2061 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2062 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2063 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2064 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2065 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2066 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2069 static const u8 IV15[] = {
2070 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2071 0xde, 0xca, 0xf8, 0x88
2074 static const u8 C15[] = {
2075 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2076 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2077 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2078 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2079 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2080 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2081 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2082 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2085 static const u8 T15[] = {
2086 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2087 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2093 static const u8 P16[] = {
2094 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2095 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2096 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2097 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2098 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2099 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2100 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2101 0xba, 0x63, 0x7b, 0x39
2104 static const u8 A16[] = {
2105 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2106 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2107 0xab, 0xad, 0xda, 0xd2
2110 static const u8 C16[] = {
2111 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2112 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2113 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2114 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2115 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2116 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2117 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2118 0xbc, 0xc9, 0xf6, 0x62
2121 static const u8 T16[] = {
2122 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2123 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2130 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2132 static const u8 C17[] = {
2133 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2134 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2135 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2136 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2137 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2138 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2139 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2140 0xf4, 0x7c, 0x9b, 0x1f
2143 static const u8 T17[] = {
2144 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2145 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2152 static const u8 IV18[] = {
2153 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2154 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2155 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2156 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2157 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2158 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2159 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2160 0xa6, 0x37, 0xb3, 0x9b
2163 static const u8 C18[] = {
2164 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2165 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2166 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2167 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2168 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2169 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2170 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2171 0x44, 0xae, 0x7e, 0x3f
2174 static const u8 T18[] = {
2175 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2176 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2184 static const u8 A19[] = {
2185 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2186 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2187 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2188 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2189 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2190 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2191 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2192 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2193 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2194 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2195 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2196 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2197 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2198 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2199 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2200 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2203 static const u8 T19[] = {
2204 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2205 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2211 /* this results in 0xff in counter LSB */
2212 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2214 static const u8 P20[288];
2215 static const u8 C20[] = {
2216 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2217 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2218 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2219 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2220 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2221 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2222 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2223 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2224 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2225 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2226 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2227 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2228 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2229 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2230 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2231 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2232 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2233 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2234 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2235 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2236 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2237 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2238 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2239 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2240 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2241 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2242 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2243 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2244 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2245 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2246 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2247 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2248 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2249 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2250 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2251 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2254 static const u8 T20[] = {
2255 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2256 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2259 # define TEST_CASE(n) do { \
2260 u8 out[sizeof(P##n)]; \
2261 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2262 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2263 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2264 memset(out,0,sizeof(out)); \
2265 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2266 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2267 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2268 (C##n && memcmp(out,C##n,sizeof(out)))) \
2269 ret++, printf ("encrypt test#%d failed.\n",n); \
2270 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2271 memset(out,0,sizeof(out)); \
2272 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2273 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2274 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2275 (P##n && memcmp(out,P##n,sizeof(out)))) \
2276 ret++, printf ("decrypt test#%d failed.\n",n); \
2306 # ifdef OPENSSL_CPUID_OBJ
2308 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2315 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2316 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2317 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2319 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2320 start = OPENSSL_rdtsc();
2321 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2322 gcm_t = OPENSSL_rdtsc() - start;
2324 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2325 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2326 (block128_f) AES_encrypt);
2327 start = OPENSSL_rdtsc();
2328 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2329 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2330 (block128_f) AES_encrypt);
2331 ctr_t = OPENSSL_rdtsc() - start;
2333 printf("%.2f-%.2f=%.2f\n",
2334 gcm_t / (double)sizeof(buf),
2335 ctr_t / (double)sizeof(buf),
2336 (gcm_t - ctr_t) / (double)sizeof(buf));
2339 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2340 const u8 *inp, size_t len) = ctx.ghash;
2342 GHASH((&ctx), buf.c, sizeof(buf));
2343 start = OPENSSL_rdtsc();
2344 for (i = 0; i < 100; ++i)
2345 GHASH((&ctx), buf.c, sizeof(buf));
2346 gcm_t = OPENSSL_rdtsc() - start;
2347 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);