1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
61 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62 /* redefine, because alignment is ensured */
64 # define GETU32(p) BSWAP4(*(const u32 *)(p))
66 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70 #define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
115 * Value of 1 is not appropriate for performance reasons.
119 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
143 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 const u8 *xi = (const u8 *)Xi + 15;
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
220 Z.hi ^= Htable[n].hi;
221 Z.lo ^= Htable[n].lo;
228 rem = (size_t)Z.lo & 0xff;
229 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
231 if (sizeof(size_t) == 8)
232 Z.hi ^= rem_8bit[rem];
234 Z.hi ^= (u64)rem_8bit[rem] << 32;
237 if (is_endian.little) {
239 Xi[0] = BSWAP8(Z.hi);
240 Xi[1] = BSWAP8(Z.lo);
244 v = (u32)(Z.hi >> 32);
248 v = (u32)(Z.lo >> 32);
259 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
263 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
266 # if defined(OPENSSL_SMALL_FOOTPRINT)
275 # if defined(OPENSSL_SMALL_FOOTPRINT)
276 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
281 for (i = 2; i < 16; i <<= 1) {
282 u128 *Hi = Htable + i;
284 for (V = *Hi, j = 1; j < i; ++j) {
285 Hi[j].hi = V.hi ^ Htable[j].hi;
286 Hi[j].lo = V.lo ^ Htable[j].lo;
297 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
299 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
300 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
301 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
303 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
304 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
305 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
306 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
307 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
308 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
309 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
311 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
313 * ARM assembler expects specific dword order in Htable.
322 if (is_endian.little)
323 for (j = 0; j < 16; ++j) {
328 for (j = 0; j < 16; ++j) {
330 Htable[j].hi = V.lo << 32 | V.lo >> 32;
331 Htable[j].lo = V.hi << 32 | V.hi >> 32;
338 static const size_t rem_4bit[16] = {
339 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
340 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
341 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
342 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
345 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
349 size_t rem, nlo, nhi;
355 nlo = ((const u8 *)Xi)[15];
359 Z.hi = Htable[nlo].hi;
360 Z.lo = Htable[nlo].lo;
363 rem = (size_t)Z.lo & 0xf;
364 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
366 if (sizeof(size_t) == 8)
367 Z.hi ^= rem_4bit[rem];
369 Z.hi ^= (u64)rem_4bit[rem] << 32;
371 Z.hi ^= Htable[nhi].hi;
372 Z.lo ^= Htable[nhi].lo;
377 nlo = ((const u8 *)Xi)[cnt];
381 rem = (size_t)Z.lo & 0xf;
382 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
384 if (sizeof(size_t) == 8)
385 Z.hi ^= rem_4bit[rem];
387 Z.hi ^= (u64)rem_4bit[rem] << 32;
389 Z.hi ^= Htable[nlo].hi;
390 Z.lo ^= Htable[nlo].lo;
393 if (is_endian.little) {
395 Xi[0] = BSWAP8(Z.hi);
396 Xi[1] = BSWAP8(Z.lo);
400 v = (u32)(Z.hi >> 32);
404 v = (u32)(Z.lo >> 32);
415 # if !defined(OPENSSL_SMALL_FOOTPRINT)
417 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
418 * details... Compiler-generated code doesn't seem to give any
419 * performance improvement, at least not on x86[_64]. It's here
420 * mostly as reference and a placeholder for possible future
421 * non-trivial optimization[s]...
423 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
424 const u8 *inp, size_t len)
428 size_t rem, nlo, nhi;
437 nlo = ((const u8 *)Xi)[15];
442 Z.hi = Htable[nlo].hi;
443 Z.lo = Htable[nlo].lo;
446 rem = (size_t)Z.lo & 0xf;
447 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
449 if (sizeof(size_t) == 8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem] << 32;
454 Z.hi ^= Htable[nhi].hi;
455 Z.lo ^= Htable[nhi].lo;
460 nlo = ((const u8 *)Xi)[cnt];
465 rem = (size_t)Z.lo & 0xf;
466 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
468 if (sizeof(size_t) == 8)
469 Z.hi ^= rem_4bit[rem];
471 Z.hi ^= (u64)rem_4bit[rem] << 32;
473 Z.hi ^= Htable[nlo].hi;
474 Z.lo ^= Htable[nlo].lo;
478 * Extra 256+16 bytes per-key plus 512 bytes shared tables
479 * [should] give ~50% improvement... One could have PACK()-ed
480 * the rem_8bit even here, but the priority is to minimize
483 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
484 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
485 static const unsigned short rem_8bit[256] = {
486 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
487 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
488 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
489 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
490 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
491 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
492 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
493 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
494 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
495 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
496 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
497 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
498 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
499 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
500 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
501 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
502 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
503 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
504 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
505 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
506 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
507 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
508 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
509 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
510 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
511 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
512 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
513 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
514 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
515 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
516 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
517 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
520 * This pre-processing phase slows down procedure by approximately
521 * same time as it makes each loop spin faster. In other words
522 * single block performance is approximately same as straightforward
523 * "4-bit" implementation, and then it goes only faster...
525 for (cnt = 0; cnt < 16; ++cnt) {
526 Z.hi = Htable[cnt].hi;
527 Z.lo = Htable[cnt].lo;
528 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
529 Hshr4[cnt].hi = (Z.hi >> 4);
530 Hshl4[cnt] = (u8)(Z.lo << 4);
534 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
535 nlo = ((const u8 *)Xi)[cnt];
540 Z.hi ^= Htable[nlo].hi;
541 Z.lo ^= Htable[nlo].lo;
543 rem = (size_t)Z.lo & 0xff;
545 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
548 Z.hi ^= Hshr4[nhi].hi;
549 Z.lo ^= Hshr4[nhi].lo;
550 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
553 nlo = ((const u8 *)Xi)[0];
558 Z.hi ^= Htable[nlo].hi;
559 Z.lo ^= Htable[nlo].lo;
561 rem = (size_t)Z.lo & 0xf;
563 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
566 Z.hi ^= Htable[nhi].hi;
567 Z.lo ^= Htable[nhi].lo;
568 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
571 if (is_endian.little) {
573 Xi[0] = BSWAP8(Z.hi);
574 Xi[1] = BSWAP8(Z.lo);
578 v = (u32)(Z.hi >> 32);
582 v = (u32)(Z.lo >> 32);
591 } while (inp += 16, len -= 16);
595 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
596 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
600 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
601 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
602 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
604 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
605 * effect. In other words idea is to hash data while it's still in L1 cache
606 * after encryption pass...
608 # define GHASH_CHUNK (3*1024)
611 #else /* TABLE_BITS */
613 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
615 u128 V, Z = { 0, 0 };
618 const long *xi = (const long *)Xi;
624 V.hi = H[0]; /* H is in host byte order, no byte swapping */
627 for (j = 0; j < 16 / sizeof(long); ++j) {
628 if (is_endian.little) {
629 if (sizeof(long) == 8) {
631 X = (long)(BSWAP8(xi[j]));
633 const u8 *p = (const u8 *)(xi + j);
634 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
637 const u8 *p = (const u8 *)(xi + j);
643 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
644 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
652 if (is_endian.little) {
654 Xi[0] = BSWAP8(Z.hi);
655 Xi[1] = BSWAP8(Z.lo);
659 v = (u32)(Z.hi >> 32);
663 v = (u32)(Z.lo >> 32);
674 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
678 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
679 # if !defined(I386_ONLY) && \
680 (defined(__i386) || defined(__i386__) || \
681 defined(__x86_64) || defined(__x86_64__) || \
682 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
683 # define GHASH_ASM_X86_OR_64
684 # define GCM_FUNCREF_4BIT
685 extern unsigned int OPENSSL_ia32cap_P[2];
687 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
688 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
689 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
693 # define gcm_init_avx gcm_init_clmul
694 # define gcm_gmult_avx gcm_gmult_clmul
695 # define gcm_ghash_avx gcm_ghash_clmul
697 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
698 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
703 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
704 # define GHASH_ASM_X86
705 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
706 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
709 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
710 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
713 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
714 # include "arm_arch.h"
715 # if __ARM_MAX_ARCH__>=7
716 # define GHASH_ASM_ARM
717 # define GCM_FUNCREF_4BIT
718 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
719 # if defined(__arm__) || defined(__arm)
720 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
722 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
723 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
724 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
726 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
727 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
728 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
731 # elif defined(__sparc__) || defined(__sparc)
732 # include "sparc_arch.h"
733 # define GHASH_ASM_SPARC
734 # define GCM_FUNCREF_4BIT
735 extern unsigned int OPENSSL_sparcv9cap_P[];
736 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
737 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
738 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
740 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
741 # include "ppc_arch.h"
742 # define GHASH_ASM_PPC
743 # define GCM_FUNCREF_4BIT
744 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
745 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
746 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
751 #ifdef GCM_FUNCREF_4BIT
753 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
756 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
760 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
767 memset(ctx, 0, sizeof(*ctx));
771 (*block) (ctx->H.c, ctx->H.c, key);
773 if (is_endian.little) {
774 /* H is stored in host byte order */
776 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
777 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
781 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
782 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
788 gcm_init_8bit(ctx->Htable, ctx->H.u);
791 # define CTX__GHASH(f) (ctx->ghash = (f))
793 # define CTX__GHASH(f) (ctx->ghash = NULL)
795 # if defined(GHASH_ASM_X86_OR_64)
796 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
797 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
798 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
799 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
800 gcm_init_avx(ctx->Htable, ctx->H.u);
801 ctx->gmult = gcm_gmult_avx;
802 CTX__GHASH(gcm_ghash_avx);
804 gcm_init_clmul(ctx->Htable, ctx->H.u);
805 ctx->gmult = gcm_gmult_clmul;
806 CTX__GHASH(gcm_ghash_clmul);
811 gcm_init_4bit(ctx->Htable, ctx->H.u);
812 # if defined(GHASH_ASM_X86) /* x86 only */
813 # if defined(OPENSSL_IA32_SSE2)
814 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
816 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
818 ctx->gmult = gcm_gmult_4bit_mmx;
819 CTX__GHASH(gcm_ghash_4bit_mmx);
821 ctx->gmult = gcm_gmult_4bit_x86;
822 CTX__GHASH(gcm_ghash_4bit_x86);
825 ctx->gmult = gcm_gmult_4bit;
826 CTX__GHASH(gcm_ghash_4bit);
828 # elif defined(GHASH_ASM_ARM)
829 # ifdef PMULL_CAPABLE
831 gcm_init_v8(ctx->Htable, ctx->H.u);
832 ctx->gmult = gcm_gmult_v8;
833 CTX__GHASH(gcm_ghash_v8);
838 gcm_init_neon(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_neon;
840 CTX__GHASH(gcm_ghash_neon);
844 gcm_init_4bit(ctx->Htable, ctx->H.u);
845 ctx->gmult = gcm_gmult_4bit;
846 CTX__GHASH(gcm_ghash_4bit);
848 # elif defined(GHASH_ASM_SPARC)
849 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
850 gcm_init_vis3(ctx->Htable, ctx->H.u);
851 ctx->gmult = gcm_gmult_vis3;
852 CTX__GHASH(gcm_ghash_vis3);
854 gcm_init_4bit(ctx->Htable, ctx->H.u);
855 ctx->gmult = gcm_gmult_4bit;
856 CTX__GHASH(gcm_ghash_4bit);
858 # elif defined(GHASH_ASM_PPC)
859 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
860 gcm_init_p8(ctx->Htable, ctx->H.u);
861 ctx->gmult = gcm_gmult_p8;
862 CTX__GHASH(gcm_ghash_p8);
864 gcm_init_4bit(ctx->Htable, ctx->H.u);
865 ctx->gmult = gcm_gmult_4bit;
866 CTX__GHASH(gcm_ghash_4bit);
869 gcm_init_4bit(ctx->Htable, ctx->H.u);
875 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
883 #ifdef GCM_FUNCREF_4BIT
884 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
891 ctx->len.u[0] = 0; /* AAD length */
892 ctx->len.u[1] = 0; /* message length */
897 memcpy(ctx->Yi.c, iv, 12);
905 for (i = 0; i < 16; ++i)
906 ctx->Yi.c[i] ^= iv[i];
912 for (i = 0; i < len; ++i)
913 ctx->Yi.c[i] ^= iv[i];
917 if (is_endian.little) {
919 ctx->Yi.u[1] ^= BSWAP8(len0);
921 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
922 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
923 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
924 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
925 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
926 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
927 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
928 ctx->Yi.c[15] ^= (u8)(len0);
931 ctx->Yi.u[1] ^= len0;
935 if (is_endian.little)
937 ctr = BSWAP4(ctx->Yi.d[3]);
939 ctr = GETU32(ctx->Yi.c + 12);
945 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
947 if (is_endian.little)
949 ctx->Yi.d[3] = BSWAP4(ctr);
951 PUTU32(ctx->Yi.c + 12, ctr);
957 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
962 u64 alen = ctx->len.u[0];
963 #ifdef GCM_FUNCREF_4BIT
964 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
966 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
967 const u8 *inp, size_t len) = ctx->ghash;
975 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
977 ctx->len.u[0] = alen;
982 ctx->Xi.c[n] ^= *(aad++);
994 if ((i = (len & (size_t)-16))) {
1001 for (i = 0; i < 16; ++i)
1002 ctx->Xi.c[i] ^= aad[i];
1009 n = (unsigned int)len;
1010 for (i = 0; i < len; ++i)
1011 ctx->Xi.c[i] ^= aad[i];
1018 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1019 const unsigned char *in, unsigned char *out,
1025 } is_endian = { 1 };
1026 unsigned int n, ctr;
1028 u64 mlen = ctx->len.u[1];
1029 block128_f block = ctx->block;
1030 void *key = ctx->key;
1031 #ifdef GCM_FUNCREF_4BIT
1032 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1033 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1034 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1035 const u8 *inp, size_t len) = ctx->ghash;
1040 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1042 ctx->len.u[1] = mlen;
1045 /* First call to encrypt finalizes GHASH(AAD) */
1050 if (is_endian.little)
1052 ctr = BSWAP4(ctx->Yi.d[3]);
1054 ctr = GETU32(ctx->Yi.c + 12);
1060 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1061 if (16 % sizeof(size_t) == 0) { /* always true actually */
1065 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1076 # if defined(STRICT_ALIGNMENT)
1077 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1081 # if defined(GHASH_CHUNK)
1082 while (len >= GHASH_CHUNK) {
1083 size_t j = GHASH_CHUNK;
1086 size_t *out_t = (size_t *)out;
1087 const size_t *in_t = (const size_t *)in;
1089 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1091 if (is_endian.little)
1093 ctx->Yi.d[3] = BSWAP4(ctr);
1095 PUTU32(ctx->Yi.c + 12, ctr);
1099 for (i = 0; i < 16 / sizeof(size_t); ++i)
1100 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1105 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1109 if ((i = (len & (size_t)-16))) {
1113 size_t *out_t = (size_t *)out;
1114 const size_t *in_t = (const size_t *)in;
1116 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1118 if (is_endian.little)
1120 ctx->Yi.d[3] = BSWAP4(ctr);
1122 PUTU32(ctx->Yi.c + 12, ctr);
1126 for (i = 0; i < 16 / sizeof(size_t); ++i)
1127 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1132 GHASH(ctx, out - j, j);
1136 size_t *out_t = (size_t *)out;
1137 const size_t *in_t = (const size_t *)in;
1139 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1141 if (is_endian.little)
1143 ctx->Yi.d[3] = BSWAP4(ctr);
1145 PUTU32(ctx->Yi.c + 12, ctr);
1149 for (i = 0; i < 16 / sizeof(size_t); ++i)
1150 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1158 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1160 if (is_endian.little)
1162 ctx->Yi.d[3] = BSWAP4(ctr);
1164 PUTU32(ctx->Yi.c + 12, ctr);
1169 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1179 for (i = 0; i < len; ++i) {
1181 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1183 if (is_endian.little)
1185 ctx->Yi.d[3] = BSWAP4(ctr);
1187 PUTU32(ctx->Yi.c + 12, ctr);
1192 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1202 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1203 const unsigned char *in, unsigned char *out,
1209 } is_endian = { 1 };
1210 unsigned int n, ctr;
1212 u64 mlen = ctx->len.u[1];
1213 block128_f block = ctx->block;
1214 void *key = ctx->key;
1215 #ifdef GCM_FUNCREF_4BIT
1216 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1217 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1218 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1219 const u8 *inp, size_t len) = ctx->ghash;
1224 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1226 ctx->len.u[1] = mlen;
1229 /* First call to decrypt finalizes GHASH(AAD) */
1234 if (is_endian.little)
1236 ctr = BSWAP4(ctx->Yi.d[3]);
1238 ctr = GETU32(ctx->Yi.c + 12);
1244 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1245 if (16 % sizeof(size_t) == 0) { /* always true actually */
1250 *(out++) = c ^ ctx->EKi.c[n];
1262 # if defined(STRICT_ALIGNMENT)
1263 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1267 # if defined(GHASH_CHUNK)
1268 while (len >= GHASH_CHUNK) {
1269 size_t j = GHASH_CHUNK;
1271 GHASH(ctx, in, GHASH_CHUNK);
1273 size_t *out_t = (size_t *)out;
1274 const size_t *in_t = (const size_t *)in;
1276 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1278 if (is_endian.little)
1280 ctx->Yi.d[3] = BSWAP4(ctr);
1282 PUTU32(ctx->Yi.c + 12, ctr);
1286 for (i = 0; i < 16 / sizeof(size_t); ++i)
1287 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1295 if ((i = (len & (size_t)-16))) {
1298 size_t *out_t = (size_t *)out;
1299 const size_t *in_t = (const size_t *)in;
1301 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1303 if (is_endian.little)
1305 ctx->Yi.d[3] = BSWAP4(ctr);
1307 PUTU32(ctx->Yi.c + 12, ctr);
1311 for (i = 0; i < 16 / sizeof(size_t); ++i)
1312 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1320 size_t *out_t = (size_t *)out;
1321 const size_t *in_t = (const size_t *)in;
1323 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1325 if (is_endian.little)
1327 ctx->Yi.d[3] = BSWAP4(ctr);
1329 PUTU32(ctx->Yi.c + 12, ctr);
1333 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1335 out[i] = c ^ ctx->EKi.t[i];
1345 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1347 if (is_endian.little)
1349 ctx->Yi.d[3] = BSWAP4(ctr);
1351 PUTU32(ctx->Yi.c + 12, ctr);
1358 out[n] = c ^ ctx->EKi.c[n];
1368 for (i = 0; i < len; ++i) {
1371 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373 if (is_endian.little)
1375 ctx->Yi.d[3] = BSWAP4(ctr);
1377 PUTU32(ctx->Yi.c + 12, ctr);
1383 out[i] = c ^ ctx->EKi.c[n];
1394 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1395 const unsigned char *in, unsigned char *out,
1396 size_t len, ctr128_f stream)
1398 #if defined(OPENSSL_SMALL_FOOTPRINT)
1399 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1404 } is_endian = { 1 };
1405 unsigned int n, ctr;
1407 u64 mlen = ctx->len.u[1];
1408 void *key = ctx->key;
1409 # ifdef GCM_FUNCREF_4BIT
1410 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1412 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1413 const u8 *inp, size_t len) = ctx->ghash;
1418 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1420 ctx->len.u[1] = mlen;
1423 /* First call to encrypt finalizes GHASH(AAD) */
1428 if (is_endian.little)
1430 ctr = BSWAP4(ctx->Yi.d[3]);
1432 ctr = GETU32(ctx->Yi.c + 12);
1440 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1451 # if defined(GHASH) && defined(GHASH_CHUNK)
1452 while (len >= GHASH_CHUNK) {
1453 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1454 ctr += GHASH_CHUNK / 16;
1455 if (is_endian.little)
1457 ctx->Yi.d[3] = BSWAP4(ctr);
1459 PUTU32(ctx->Yi.c + 12, ctr);
1463 GHASH(ctx, out, GHASH_CHUNK);
1469 if ((i = (len & (size_t)-16))) {
1472 (*stream) (in, out, j, key, ctx->Yi.c);
1473 ctr += (unsigned int)j;
1474 if (is_endian.little)
1476 ctx->Yi.d[3] = BSWAP4(ctr);
1478 PUTU32(ctx->Yi.c + 12, ctr);
1489 for (i = 0; i < 16; ++i)
1490 ctx->Xi.c[i] ^= out[i];
1497 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1499 if (is_endian.little)
1501 ctx->Yi.d[3] = BSWAP4(ctr);
1503 PUTU32(ctx->Yi.c + 12, ctr);
1508 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1518 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1519 const unsigned char *in, unsigned char *out,
1520 size_t len, ctr128_f stream)
1522 #if defined(OPENSSL_SMALL_FOOTPRINT)
1523 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1528 } is_endian = { 1 };
1529 unsigned int n, ctr;
1531 u64 mlen = ctx->len.u[1];
1532 void *key = ctx->key;
1533 # ifdef GCM_FUNCREF_4BIT
1534 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1536 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1537 const u8 *inp, size_t len) = ctx->ghash;
1542 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1544 ctx->len.u[1] = mlen;
1547 /* First call to decrypt finalizes GHASH(AAD) */
1552 if (is_endian.little)
1554 ctr = BSWAP4(ctx->Yi.d[3]);
1556 ctr = GETU32(ctx->Yi.c + 12);
1565 *(out++) = c ^ ctx->EKi.c[n];
1577 # if defined(GHASH) && defined(GHASH_CHUNK)
1578 while (len >= GHASH_CHUNK) {
1579 GHASH(ctx, in, GHASH_CHUNK);
1580 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1581 ctr += GHASH_CHUNK / 16;
1582 if (is_endian.little)
1584 ctx->Yi.d[3] = BSWAP4(ctr);
1586 PUTU32(ctx->Yi.c + 12, ctr);
1595 if ((i = (len & (size_t)-16))) {
1603 for (k = 0; k < 16; ++k)
1604 ctx->Xi.c[k] ^= in[k];
1611 (*stream) (in, out, j, key, ctx->Yi.c);
1612 ctr += (unsigned int)j;
1613 if (is_endian.little)
1615 ctx->Yi.d[3] = BSWAP4(ctr);
1617 PUTU32(ctx->Yi.c + 12, ctr);
1626 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1628 if (is_endian.little)
1630 ctx->Yi.d[3] = BSWAP4(ctr);
1632 PUTU32(ctx->Yi.c + 12, ctr);
1639 out[n] = c ^ ctx->EKi.c[n];
1649 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1655 } is_endian = { 1 };
1656 u64 alen = ctx->len.u[0] << 3;
1657 u64 clen = ctx->len.u[1] << 3;
1658 #ifdef GCM_FUNCREF_4BIT
1659 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1662 if (ctx->mres || ctx->ares)
1665 if (is_endian.little) {
1667 alen = BSWAP8(alen);
1668 clen = BSWAP8(clen);
1672 ctx->len.u[0] = alen;
1673 ctx->len.u[1] = clen;
1675 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1676 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1680 ctx->Xi.u[0] ^= alen;
1681 ctx->Xi.u[1] ^= clen;
1684 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1685 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1687 if (tag && len <= sizeof(ctx->Xi))
1688 return memcmp(ctx->Xi.c, tag, len);
1693 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1695 CRYPTO_gcm128_finish(ctx, NULL, 0);
1696 memcpy(tag, ctx->Xi.c,
1697 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1700 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1702 GCM128_CONTEXT *ret;
1704 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1705 CRYPTO_gcm128_init(ret, key, block);
1710 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1713 OPENSSL_cleanse(ctx, sizeof(*ctx));
1718 #if defined(SELFTEST)
1720 # include <openssl/aes.h>
1723 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1724 static const u8 T1[] = {
1725 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1726 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1733 static const u8 P2[16];
1734 static const u8 C2[] = {
1735 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1736 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1739 static const u8 T2[] = {
1740 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1741 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1746 static const u8 K3[] = {
1747 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1748 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1751 static const u8 P3[] = {
1752 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1753 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1754 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1755 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1756 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1757 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1758 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1759 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1762 static const u8 IV3[] = {
1763 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1764 0xde, 0xca, 0xf8, 0x88
1767 static const u8 C3[] = {
1768 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1769 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1770 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1771 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1772 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1773 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1774 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1775 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1778 static const u8 T3[] = {
1779 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1780 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1786 static const u8 P4[] = {
1787 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1788 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1789 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1790 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1791 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1792 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1793 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1794 0xba, 0x63, 0x7b, 0x39
1797 static const u8 A4[] = {
1798 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1799 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1800 0xab, 0xad, 0xda, 0xd2
1803 static const u8 C4[] = {
1804 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1805 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1806 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1807 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1808 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1809 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1810 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1811 0x3d, 0x58, 0xe0, 0x91
1814 static const u8 T4[] = {
1815 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1816 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1823 static const u8 IV5[] = {
1824 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1827 static const u8 C5[] = {
1828 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1829 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1830 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1831 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1832 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1833 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1834 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1835 0xc2, 0x3f, 0x45, 0x98
1838 static const u8 T5[] = {
1839 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1840 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1847 static const u8 IV6[] = {
1848 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1849 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1850 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1851 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1852 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1853 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1854 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1855 0xa6, 0x37, 0xb3, 0x9b
1858 static const u8 C6[] = {
1859 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1860 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1861 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1862 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1863 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1864 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1865 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1866 0x4c, 0x34, 0xae, 0xe5
1869 static const u8 T6[] = {
1870 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1871 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1875 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1876 static const u8 T7[] = {
1877 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1878 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1885 static const u8 P8[16];
1886 static const u8 C8[] = {
1887 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1888 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1891 static const u8 T8[] = {
1892 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1893 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1898 static const u8 K9[] = {
1899 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1900 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1901 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1904 static const u8 P9[] = {
1905 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1906 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1907 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1908 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1909 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1910 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1911 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1912 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1915 static const u8 IV9[] = {
1916 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1917 0xde, 0xca, 0xf8, 0x88
1920 static const u8 C9[] = {
1921 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1922 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1923 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1924 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1925 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1926 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1927 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1928 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1931 static const u8 T9[] = {
1932 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1933 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1939 static const u8 P10[] = {
1940 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1941 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1942 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1943 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1944 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1945 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1946 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1947 0xba, 0x63, 0x7b, 0x39
1950 static const u8 A10[] = {
1951 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1952 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1953 0xab, 0xad, 0xda, 0xd2
1956 static const u8 C10[] = {
1957 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1958 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1959 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1960 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1961 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1962 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1963 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1964 0xcc, 0xda, 0x27, 0x10
1967 static const u8 T10[] = {
1968 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1969 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1976 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1978 static const u8 C11[] = {
1979 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1980 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1981 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1982 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1983 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1984 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1985 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1986 0xa0, 0xf0, 0x62, 0xf7
1989 static const u8 T11[] = {
1990 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
1991 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
1998 static const u8 IV12[] = {
1999 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2000 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2001 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2002 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2003 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2004 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2005 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2006 0xa6, 0x37, 0xb3, 0x9b
2009 static const u8 C12[] = {
2010 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2011 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2012 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2013 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2014 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2015 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2016 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2017 0xe9, 0xb7, 0x37, 0x3b
2020 static const u8 T12[] = {
2021 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2022 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2026 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2027 static const u8 T13[] = {
2028 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2029 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2035 static const u8 P14[16], IV14[12];
2036 static const u8 C14[] = {
2037 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2038 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2041 static const u8 T14[] = {
2042 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2043 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2048 static const u8 K15[] = {
2049 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2050 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2051 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2052 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2055 static const u8 P15[] = {
2056 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2057 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2058 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2059 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2060 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2061 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2062 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2063 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2066 static const u8 IV15[] = {
2067 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2068 0xde, 0xca, 0xf8, 0x88
2071 static const u8 C15[] = {
2072 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2073 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2074 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2075 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2076 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2077 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2078 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2079 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2082 static const u8 T15[] = {
2083 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2084 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2090 static const u8 P16[] = {
2091 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2092 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2093 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2094 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2095 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2096 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2097 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2098 0xba, 0x63, 0x7b, 0x39
2101 static const u8 A16[] = {
2102 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2103 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2104 0xab, 0xad, 0xda, 0xd2
2107 static const u8 C16[] = {
2108 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2109 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2110 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2111 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2112 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2113 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2114 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2115 0xbc, 0xc9, 0xf6, 0x62
2118 static const u8 T16[] = {
2119 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2120 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2127 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2129 static const u8 C17[] = {
2130 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2131 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2132 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2133 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2134 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2135 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2136 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2137 0xf4, 0x7c, 0x9b, 0x1f
2140 static const u8 T17[] = {
2141 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2142 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2149 static const u8 IV18[] = {
2150 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2151 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2152 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2153 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2154 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2155 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2156 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2157 0xa6, 0x37, 0xb3, 0x9b
2160 static const u8 C18[] = {
2161 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2162 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2163 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2164 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2165 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2166 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2167 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2168 0x44, 0xae, 0x7e, 0x3f
2171 static const u8 T18[] = {
2172 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2173 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2181 static const u8 A19[] = {
2182 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2183 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2184 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2185 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2186 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2187 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2188 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2189 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2190 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2191 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2192 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2193 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2194 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2195 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2196 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2197 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2200 static const u8 T19[] = {
2201 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2202 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2208 /* this results in 0xff in counter LSB */
2209 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2211 static const u8 P20[288];
2212 static const u8 C20[] = {
2213 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2214 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2215 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2216 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2217 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2218 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2219 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2220 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2221 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2222 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2223 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2224 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2225 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2226 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2227 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2228 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2229 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2230 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2231 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2232 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2233 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2234 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2235 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2236 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2237 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2238 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2239 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2240 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2241 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2242 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2243 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2244 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2245 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2246 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2247 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2248 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2251 static const u8 T20[] = {
2252 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2253 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2256 # define TEST_CASE(n) do { \
2257 u8 out[sizeof(P##n)]; \
2258 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2259 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2260 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2261 memset(out,0,sizeof(out)); \
2262 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2263 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2264 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2265 (C##n && memcmp(out,C##n,sizeof(out)))) \
2266 ret++, printf ("encrypt test#%d failed.\n",n); \
2267 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2268 memset(out,0,sizeof(out)); \
2269 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2270 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2271 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2272 (P##n && memcmp(out,P##n,sizeof(out)))) \
2273 ret++, printf ("decrypt test#%d failed.\n",n); \
2303 # ifdef OPENSSL_CPUID_OBJ
2305 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2312 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2313 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2314 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2316 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2317 start = OPENSSL_rdtsc();
2318 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2319 gcm_t = OPENSSL_rdtsc() - start;
2321 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2322 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2323 (block128_f) AES_encrypt);
2324 start = OPENSSL_rdtsc();
2325 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2326 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2327 (block128_f) AES_encrypt);
2328 ctr_t = OPENSSL_rdtsc() - start;
2330 printf("%.2f-%.2f=%.2f\n",
2331 gcm_t / (double)sizeof(buf),
2332 ctr_t / (double)sizeof(buf),
2333 (gcm_t - ctr_t) / (double)sizeof(buf));
2336 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2337 const u8 *inp, size_t len) = ctx.ghash;
2339 GHASH((&ctx), buf.c, sizeof(buf));
2340 start = OPENSSL_rdtsc();
2341 for (i = 0; i < 100; ++i)
2342 GHASH((&ctx), buf.c, sizeof(buf));
2343 gcm_t = OPENSSL_rdtsc() - start;
2344 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);