1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
61 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62 /* redefine, because alignment is ensured */
64 # define GETU32(p) BSWAP4(*(const u32 *)(p))
66 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70 #define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
115 * Value of 1 is not appropriate for performance reasons.
119 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
143 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 const u8 *xi = (const u8 *)Xi + 15;
154 static const size_t rem_8bit[256] = {
155 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
156 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
157 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
158 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
159 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
160 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
161 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
162 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
163 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
164 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
165 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
166 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
167 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
168 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
169 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
170 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
171 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
172 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
173 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
174 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
175 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
176 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
177 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
178 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
179 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
180 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
181 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
182 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
183 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
184 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
185 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
186 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
187 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
188 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
189 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
190 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
191 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
192 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
193 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
194 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
195 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
196 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
197 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
198 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
199 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
200 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
201 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
202 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
203 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
204 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
205 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
206 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
207 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
208 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
209 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
210 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
211 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
212 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
213 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
214 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
215 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
216 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
217 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
218 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
222 Z.hi ^= Htable[n].hi;
223 Z.lo ^= Htable[n].lo;
230 rem = (size_t)Z.lo & 0xff;
231 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
233 if (sizeof(size_t) == 8)
234 Z.hi ^= rem_8bit[rem];
236 Z.hi ^= (u64)rem_8bit[rem] << 32;
239 if (is_endian.little) {
241 Xi[0] = BSWAP8(Z.hi);
242 Xi[1] = BSWAP8(Z.lo);
246 v = (u32)(Z.hi >> 32);
250 v = (u32)(Z.lo >> 32);
261 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
265 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
268 # if defined(OPENSSL_SMALL_FOOTPRINT)
277 # if defined(OPENSSL_SMALL_FOOTPRINT)
278 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
283 for (i = 2; i < 16; i <<= 1) {
284 u128 *Hi = Htable + i;
286 for (V = *Hi, j = 1; j < i; ++j) {
287 Hi[j].hi = V.hi ^ Htable[j].hi;
288 Hi[j].lo = V.lo ^ Htable[j].lo;
299 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
301 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
302 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
303 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
305 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
306 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
307 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
308 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
309 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
310 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
311 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
313 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
315 * ARM assembler expects specific dword order in Htable.
326 if (is_endian.little)
327 for (j = 0; j < 16; ++j) {
332 for (j = 0; j < 16; ++j) {
334 Htable[j].hi = V.lo << 32 | V.lo >> 32;
335 Htable[j].lo = V.hi << 32 | V.hi >> 32;
342 static const size_t rem_4bit[16] = {
343 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
344 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
345 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
346 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
349 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
353 size_t rem, nlo, nhi;
361 nlo = ((const u8 *)Xi)[15];
365 Z.hi = Htable[nlo].hi;
366 Z.lo = Htable[nlo].lo;
369 rem = (size_t)Z.lo & 0xf;
370 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
372 if (sizeof(size_t) == 8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem] << 32;
377 Z.hi ^= Htable[nhi].hi;
378 Z.lo ^= Htable[nhi].lo;
383 nlo = ((const u8 *)Xi)[cnt];
387 rem = (size_t)Z.lo & 0xf;
388 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
390 if (sizeof(size_t) == 8)
391 Z.hi ^= rem_4bit[rem];
393 Z.hi ^= (u64)rem_4bit[rem] << 32;
395 Z.hi ^= Htable[nlo].hi;
396 Z.lo ^= Htable[nlo].lo;
399 if (is_endian.little) {
401 Xi[0] = BSWAP8(Z.hi);
402 Xi[1] = BSWAP8(Z.lo);
406 v = (u32)(Z.hi >> 32);
410 v = (u32)(Z.lo >> 32);
421 # if !defined(OPENSSL_SMALL_FOOTPRINT)
423 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
424 * details... Compiler-generated code doesn't seem to give any
425 * performance improvement, at least not on x86[_64]. It's here
426 * mostly as reference and a placeholder for possible future
427 * non-trivial optimization[s]...
429 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
430 const u8 *inp, size_t len)
434 size_t rem, nlo, nhi;
445 nlo = ((const u8 *)Xi)[15];
450 Z.hi = Htable[nlo].hi;
451 Z.lo = Htable[nlo].lo;
454 rem = (size_t)Z.lo & 0xf;
455 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
457 if (sizeof(size_t) == 8)
458 Z.hi ^= rem_4bit[rem];
460 Z.hi ^= (u64)rem_4bit[rem] << 32;
462 Z.hi ^= Htable[nhi].hi;
463 Z.lo ^= Htable[nhi].lo;
468 nlo = ((const u8 *)Xi)[cnt];
473 rem = (size_t)Z.lo & 0xf;
474 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
476 if (sizeof(size_t) == 8)
477 Z.hi ^= rem_4bit[rem];
479 Z.hi ^= (u64)rem_4bit[rem] << 32;
481 Z.hi ^= Htable[nlo].hi;
482 Z.lo ^= Htable[nlo].lo;
486 * Extra 256+16 bytes per-key plus 512 bytes shared tables
487 * [should] give ~50% improvement... One could have PACK()-ed
488 * the rem_8bit even here, but the priority is to minimize
491 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
492 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
493 static const unsigned short rem_8bit[256] = {
494 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
495 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
496 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
497 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
498 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
499 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
500 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
501 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
502 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
503 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
504 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
505 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
506 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
507 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
508 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
509 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
510 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
511 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
512 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
513 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
514 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
515 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
516 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
517 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
518 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
519 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
520 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
521 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
522 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
523 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
524 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
525 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
528 * This pre-processing phase slows down procedure by approximately
529 * same time as it makes each loop spin faster. In other words
530 * single block performance is approximately same as straightforward
531 * "4-bit" implementation, and then it goes only faster...
533 for (cnt = 0; cnt < 16; ++cnt) {
534 Z.hi = Htable[cnt].hi;
535 Z.lo = Htable[cnt].lo;
536 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
537 Hshr4[cnt].hi = (Z.hi >> 4);
538 Hshl4[cnt] = (u8)(Z.lo << 4);
542 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
543 nlo = ((const u8 *)Xi)[cnt];
548 Z.hi ^= Htable[nlo].hi;
549 Z.lo ^= Htable[nlo].lo;
551 rem = (size_t)Z.lo & 0xff;
553 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
556 Z.hi ^= Hshr4[nhi].hi;
557 Z.lo ^= Hshr4[nhi].lo;
558 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
561 nlo = ((const u8 *)Xi)[0];
566 Z.hi ^= Htable[nlo].hi;
567 Z.lo ^= Htable[nlo].lo;
569 rem = (size_t)Z.lo & 0xf;
571 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
574 Z.hi ^= Htable[nhi].hi;
575 Z.lo ^= Htable[nhi].lo;
576 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
579 if (is_endian.little) {
581 Xi[0] = BSWAP8(Z.hi);
582 Xi[1] = BSWAP8(Z.lo);
586 v = (u32)(Z.hi >> 32);
590 v = (u32)(Z.lo >> 32);
599 } while (inp += 16, len -= 16);
603 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
604 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
608 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
609 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
610 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
612 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
613 * effect. In other words idea is to hash data while it's still in L1 cache
614 * after encryption pass...
616 # define GHASH_CHUNK (3*1024)
619 #else /* TABLE_BITS */
621 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
623 u128 V, Z = { 0, 0 };
626 const long *xi = (const long *)Xi;
634 V.hi = H[0]; /* H is in host byte order, no byte swapping */
637 for (j = 0; j < 16 / sizeof(long); ++j) {
638 if (is_endian.little) {
639 if (sizeof(long) == 8) {
641 X = (long)(BSWAP8(xi[j]));
643 const u8 *p = (const u8 *)(xi + j);
644 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
647 const u8 *p = (const u8 *)(xi + j);
653 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
654 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
662 if (is_endian.little) {
664 Xi[0] = BSWAP8(Z.hi);
665 Xi[1] = BSWAP8(Z.lo);
669 v = (u32)(Z.hi >> 32);
673 v = (u32)(Z.lo >> 32);
684 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
688 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
689 # if !defined(I386_ONLY) && \
690 (defined(__i386) || defined(__i386__) || \
691 defined(__x86_64) || defined(__x86_64__) || \
692 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
693 # define GHASH_ASM_X86_OR_64
694 # define GCM_FUNCREF_4BIT
695 extern unsigned int OPENSSL_ia32cap_P[2];
697 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
698 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
702 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
703 # define gcm_init_avx gcm_init_clmul
704 # define gcm_gmult_avx gcm_gmult_clmul
705 # define gcm_ghash_avx gcm_ghash_clmul
707 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
708 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
709 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
713 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
714 # define GHASH_ASM_X86
715 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
716 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
719 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
720 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
723 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
724 # include "arm_arch.h"
725 # if __ARM_MAX_ARCH__>=7
726 # define GHASH_ASM_ARM
727 # define GCM_FUNCREF_4BIT
728 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
729 # if defined(__arm__) || defined(__arm)
730 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
732 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
733 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
734 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
736 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
737 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
738 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
741 # elif defined(__sparc__) || defined(__sparc)
742 # include "sparc_arch.h"
743 # define GHASH_ASM_SPARC
744 # define GCM_FUNCREF_4BIT
745 extern unsigned int OPENSSL_sparcv9cap_P[];
746 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
747 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
748 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
750 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
751 # include "ppc_arch.h"
752 # define GHASH_ASM_PPC
753 # define GCM_FUNCREF_4BIT
754 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
755 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
756 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
761 #ifdef GCM_FUNCREF_4BIT
763 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
766 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
770 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
779 memset(ctx, 0, sizeof(*ctx));
783 (*block) (ctx->H.c, ctx->H.c, key);
785 if (is_endian.little) {
786 /* H is stored in host byte order */
788 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
789 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
793 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
794 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
800 gcm_init_8bit(ctx->Htable, ctx->H.u);
802 # if defined(GHASH_ASM_X86_OR_64)
803 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
804 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
805 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
806 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
807 gcm_init_avx(ctx->Htable, ctx->H.u);
808 ctx->gmult = gcm_gmult_avx;
809 ctx->ghash = gcm_ghash_avx;
811 gcm_init_clmul(ctx->Htable, ctx->H.u);
812 ctx->gmult = gcm_gmult_clmul;
813 ctx->ghash = gcm_ghash_clmul;
818 gcm_init_4bit(ctx->Htable, ctx->H.u);
819 # if defined(GHASH_ASM_X86) /* x86 only */
820 # if defined(OPENSSL_IA32_SSE2)
821 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
823 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
825 ctx->gmult = gcm_gmult_4bit_mmx;
826 ctx->ghash = gcm_ghash_4bit_mmx;
828 ctx->gmult = gcm_gmult_4bit_x86;
829 ctx->ghash = gcm_ghash_4bit_x86;
832 ctx->gmult = gcm_gmult_4bit;
833 ctx->ghash = gcm_ghash_4bit;
835 # elif defined(GHASH_ASM_ARM)
836 # ifdef PMULL_CAPABLE
838 gcm_init_v8(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_v8;
840 ctx->ghash = gcm_ghash_v8;
845 gcm_init_neon(ctx->Htable, ctx->H.u);
846 ctx->gmult = gcm_gmult_neon;
847 ctx->ghash = gcm_ghash_neon;
851 gcm_init_4bit(ctx->Htable, ctx->H.u);
852 ctx->gmult = gcm_gmult_4bit;
853 ctx->ghash = gcm_ghash_4bit;
855 # elif defined(GHASH_ASM_SPARC)
856 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
857 gcm_init_vis3(ctx->Htable, ctx->H.u);
858 ctx->gmult = gcm_gmult_vis3;
859 ctx->ghash = gcm_ghash_vis3;
861 gcm_init_4bit(ctx->Htable, ctx->H.u);
862 ctx->gmult = gcm_gmult_4bit;
863 ctx->ghash = gcm_ghash_4bit;
865 # elif defined(GHASH_ASM_PPC)
866 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
867 gcm_init_p8(ctx->Htable, ctx->H.u);
868 ctx->gmult = gcm_gmult_p8;
869 ctx->ghash = gcm_ghash_p8;
871 gcm_init_4bit(ctx->Htable, ctx->H.u);
872 ctx->gmult = gcm_gmult_4bit;
873 ctx->ghash = gcm_ghash_4bit;
876 gcm_init_4bit(ctx->Htable, ctx->H.u);
881 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
891 #ifdef GCM_FUNCREF_4BIT
892 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
899 ctx->len.u[0] = 0; /* AAD length */
900 ctx->len.u[1] = 0; /* message length */
905 memcpy(ctx->Yi.c, iv, 12);
913 for (i = 0; i < 16; ++i)
914 ctx->Yi.c[i] ^= iv[i];
920 for (i = 0; i < len; ++i)
921 ctx->Yi.c[i] ^= iv[i];
925 if (is_endian.little) {
927 ctx->Yi.u[1] ^= BSWAP8(len0);
929 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
930 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
931 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
932 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
933 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
934 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
935 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
936 ctx->Yi.c[15] ^= (u8)(len0);
939 ctx->Yi.u[1] ^= len0;
943 if (is_endian.little)
945 ctr = BSWAP4(ctx->Yi.d[3]);
947 ctr = GETU32(ctx->Yi.c + 12);
953 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
955 if (is_endian.little)
957 ctx->Yi.d[3] = BSWAP4(ctr);
959 PUTU32(ctx->Yi.c + 12, ctr);
965 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
970 u64 alen = ctx->len.u[0];
971 #ifdef GCM_FUNCREF_4BIT
972 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
974 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
975 const u8 *inp, size_t len) = ctx->ghash;
983 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
985 ctx->len.u[0] = alen;
990 ctx->Xi.c[n] ^= *(aad++);
1002 if ((i = (len & (size_t)-16))) {
1009 for (i = 0; i < 16; ++i)
1010 ctx->Xi.c[i] ^= aad[i];
1017 n = (unsigned int)len;
1018 for (i = 0; i < len; ++i)
1019 ctx->Xi.c[i] ^= aad[i];
1026 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1027 const unsigned char *in, unsigned char *out,
1036 unsigned int n, ctr;
1038 u64 mlen = ctx->len.u[1];
1039 block128_f block = ctx->block;
1040 void *key = ctx->key;
1041 #ifdef GCM_FUNCREF_4BIT
1042 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1044 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1045 const u8 *inp, size_t len) = ctx->ghash;
1050 n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
1053 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1055 ctx->len.u[1] = mlen;
1058 /* First call to encrypt finalizes GHASH(AAD) */
1063 if (is_endian.little)
1065 ctr = BSWAP4(ctx->Yi.d[3]);
1067 ctr = GETU32(ctx->Yi.c + 12);
1073 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1074 if (16 % sizeof(size_t) == 0) { /* always true actually */
1078 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1089 # if defined(STRICT_ALIGNMENT)
1090 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1093 # if defined(GHASH) && defined(GHASH_CHUNK)
1094 while (len >= GHASH_CHUNK) {
1095 size_t j = GHASH_CHUNK;
1098 size_t *out_t = (size_t *)out;
1099 const size_t *in_t = (const size_t *)in;
1101 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1103 if (is_endian.little)
1105 ctx->Yi.d[3] = BSWAP4(ctr);
1107 PUTU32(ctx->Yi.c + 12, ctr);
1111 for (i = 0; i < 16 / sizeof(size_t); ++i)
1112 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1117 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1120 if ((i = (len & (size_t)-16))) {
1124 size_t *out_t = (size_t *)out;
1125 const size_t *in_t = (const size_t *)in;
1127 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1129 if (is_endian.little)
1131 ctx->Yi.d[3] = BSWAP4(ctr);
1133 PUTU32(ctx->Yi.c + 12, ctr);
1137 for (i = 0; i < 16 / sizeof(size_t); ++i)
1138 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1143 GHASH(ctx, out - j, j);
1147 size_t *out_t = (size_t *)out;
1148 const size_t *in_t = (const size_t *)in;
1150 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1152 if (is_endian.little)
1154 ctx->Yi.d[3] = BSWAP4(ctr);
1156 PUTU32(ctx->Yi.c + 12, ctr);
1160 for (i = 0; i < 16 / sizeof(size_t); ++i)
1161 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1169 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1171 if (is_endian.little)
1173 ctx->Yi.d[3] = BSWAP4(ctr);
1175 PUTU32(ctx->Yi.c + 12, ctr);
1180 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1190 for (i = 0; i < len; ++i) {
1192 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1194 if (is_endian.little)
1196 ctx->Yi.d[3] = BSWAP4(ctr);
1198 PUTU32(ctx->Yi.c + 12, ctr);
1203 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1213 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1214 const unsigned char *in, unsigned char *out,
1223 unsigned int n, ctr;
1225 u64 mlen = ctx->len.u[1];
1226 block128_f block = ctx->block;
1227 void *key = ctx->key;
1228 #ifdef GCM_FUNCREF_4BIT
1229 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1231 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1232 const u8 *inp, size_t len) = ctx->ghash;
1237 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1239 ctx->len.u[1] = mlen;
1242 /* First call to decrypt finalizes GHASH(AAD) */
1247 if (is_endian.little)
1249 ctr = BSWAP4(ctx->Yi.d[3]);
1251 ctr = GETU32(ctx->Yi.c + 12);
1257 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1258 if (16 % sizeof(size_t) == 0) { /* always true actually */
1263 *(out++) = c ^ ctx->EKi.c[n];
1275 # if defined(STRICT_ALIGNMENT)
1276 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1279 # if defined(GHASH) && defined(GHASH_CHUNK)
1280 while (len >= GHASH_CHUNK) {
1281 size_t j = GHASH_CHUNK;
1283 GHASH(ctx, in, GHASH_CHUNK);
1285 size_t *out_t = (size_t *)out;
1286 const size_t *in_t = (const size_t *)in;
1288 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1290 if (is_endian.little)
1292 ctx->Yi.d[3] = BSWAP4(ctr);
1294 PUTU32(ctx->Yi.c + 12, ctr);
1298 for (i = 0; i < 16 / sizeof(size_t); ++i)
1299 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1306 if ((i = (len & (size_t)-16))) {
1309 size_t *out_t = (size_t *)out;
1310 const size_t *in_t = (const size_t *)in;
1312 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1314 if (is_endian.little)
1316 ctx->Yi.d[3] = BSWAP4(ctr);
1318 PUTU32(ctx->Yi.c + 12, ctr);
1322 for (i = 0; i < 16 / sizeof(size_t); ++i)
1323 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1331 size_t *out_t = (size_t *)out;
1332 const size_t *in_t = (const size_t *)in;
1334 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1336 if (is_endian.little)
1338 ctx->Yi.d[3] = BSWAP4(ctr);
1340 PUTU32(ctx->Yi.c + 12, ctr);
1344 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1346 out[i] = c ^ ctx->EKi.t[i];
1356 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1358 if (is_endian.little)
1360 ctx->Yi.d[3] = BSWAP4(ctr);
1362 PUTU32(ctx->Yi.c + 12, ctr);
1369 out[n] = c ^ ctx->EKi.c[n];
1379 for (i = 0; i < len; ++i) {
1382 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1384 if (is_endian.little)
1386 ctx->Yi.d[3] = BSWAP4(ctr);
1388 PUTU32(ctx->Yi.c + 12, ctr);
1394 out[i] = c ^ ctx->EKi.c[n];
1405 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1406 const unsigned char *in, unsigned char *out,
1407 size_t len, ctr128_f stream)
1415 unsigned int n, ctr;
1417 u64 mlen = ctx->len.u[1];
1418 void *key = ctx->key;
1419 #ifdef GCM_FUNCREF_4BIT
1420 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1422 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1423 const u8 *inp, size_t len) = ctx->ghash;
1428 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1430 ctx->len.u[1] = mlen;
1433 /* First call to encrypt finalizes GHASH(AAD) */
1438 if (is_endian.little)
1440 ctr = BSWAP4(ctx->Yi.d[3]);
1442 ctr = GETU32(ctx->Yi.c + 12);
1450 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1461 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1462 while (len >= GHASH_CHUNK) {
1463 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1464 ctr += GHASH_CHUNK / 16;
1465 if (is_endian.little)
1467 ctx->Yi.d[3] = BSWAP4(ctr);
1469 PUTU32(ctx->Yi.c + 12, ctr);
1473 GHASH(ctx, out, GHASH_CHUNK);
1479 if ((i = (len & (size_t)-16))) {
1482 (*stream) (in, out, j, key, ctx->Yi.c);
1483 ctr += (unsigned int)j;
1484 if (is_endian.little)
1486 ctx->Yi.d[3] = BSWAP4(ctr);
1488 PUTU32(ctx->Yi.c + 12, ctr);
1499 for (i = 0; i < 16; ++i)
1500 ctx->Xi.c[i] ^= out[i];
1507 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1509 if (is_endian.little)
1511 ctx->Yi.d[3] = BSWAP4(ctr);
1513 PUTU32(ctx->Yi.c + 12, ctr);
1518 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1527 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1528 const unsigned char *in, unsigned char *out,
1529 size_t len, ctr128_f stream)
1537 unsigned int n, ctr;
1539 u64 mlen = ctx->len.u[1];
1540 void *key = ctx->key;
1541 #ifdef GCM_FUNCREF_4BIT
1542 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1544 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1545 const u8 *inp, size_t len) = ctx->ghash;
1550 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1552 ctx->len.u[1] = mlen;
1555 /* First call to decrypt finalizes GHASH(AAD) */
1560 if (is_endian.little)
1562 ctr = BSWAP4(ctx->Yi.d[3]);
1564 ctr = GETU32(ctx->Yi.c + 12);
1573 *(out++) = c ^ ctx->EKi.c[n];
1585 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1586 while (len >= GHASH_CHUNK) {
1587 GHASH(ctx, in, GHASH_CHUNK);
1588 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1589 ctr += GHASH_CHUNK / 16;
1590 if (is_endian.little)
1592 ctx->Yi.d[3] = BSWAP4(ctr);
1594 PUTU32(ctx->Yi.c + 12, ctr);
1603 if ((i = (len & (size_t)-16))) {
1611 for (k = 0; k < 16; ++k)
1612 ctx->Xi.c[k] ^= in[k];
1619 (*stream) (in, out, j, key, ctx->Yi.c);
1620 ctr += (unsigned int)j;
1621 if (is_endian.little)
1623 ctx->Yi.d[3] = BSWAP4(ctr);
1625 PUTU32(ctx->Yi.c + 12, ctr);
1634 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1636 if (is_endian.little)
1638 ctx->Yi.d[3] = BSWAP4(ctr);
1640 PUTU32(ctx->Yi.c + 12, ctr);
1647 out[n] = c ^ ctx->EKi.c[n];
1656 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1665 u64 alen = ctx->len.u[0] << 3;
1666 u64 clen = ctx->len.u[1] << 3;
1667 #ifdef GCM_FUNCREF_4BIT
1668 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1671 if (ctx->mres || ctx->ares)
1674 if (is_endian.little) {
1676 alen = BSWAP8(alen);
1677 clen = BSWAP8(clen);
1681 ctx->len.u[0] = alen;
1682 ctx->len.u[1] = clen;
1684 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1685 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1689 ctx->Xi.u[0] ^= alen;
1690 ctx->Xi.u[1] ^= clen;
1693 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1694 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1696 if (tag && len <= sizeof(ctx->Xi))
1697 return memcmp(ctx->Xi.c, tag, len);
1702 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1704 CRYPTO_gcm128_finish(ctx, NULL, 0);
1705 memcpy(tag, ctx->Xi.c,
1706 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1709 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1711 GCM128_CONTEXT *ret;
1713 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1714 CRYPTO_gcm128_init(ret, key, block);
1719 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1722 OPENSSL_cleanse(ctx, sizeof(*ctx));
1727 #if defined(SELFTEST)
1729 # include <openssl/aes.h>
1732 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1733 static const u8 T1[] = {
1734 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1735 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1742 static const u8 P2[16];
1743 static const u8 C2[] = {
1744 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1745 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1748 static const u8 T2[] = {
1749 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1750 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1755 static const u8 K3[] = {
1756 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1757 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1760 static const u8 P3[] = {
1761 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1762 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1763 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1764 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1765 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1766 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1767 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1768 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1771 static const u8 IV3[] = {
1772 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1773 0xde, 0xca, 0xf8, 0x88
1776 static const u8 C3[] = {
1777 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1778 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1779 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1780 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1781 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1782 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1783 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1784 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1787 static const u8 T3[] = {
1788 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1789 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1795 static const u8 P4[] = {
1796 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1797 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1798 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1799 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1800 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1801 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1802 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1803 0xba, 0x63, 0x7b, 0x39
1806 static const u8 A4[] = {
1807 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1808 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1809 0xab, 0xad, 0xda, 0xd2
1812 static const u8 C4[] = {
1813 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1814 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1815 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1816 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1817 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1818 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1819 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1820 0x3d, 0x58, 0xe0, 0x91
1823 static const u8 T4[] = {
1824 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1825 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1832 static const u8 IV5[] = {
1833 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1836 static const u8 C5[] = {
1837 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1838 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1839 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1840 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1841 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1842 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1843 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1844 0xc2, 0x3f, 0x45, 0x98
1847 static const u8 T5[] = {
1848 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1849 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1856 static const u8 IV6[] = {
1857 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1858 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1859 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1860 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1861 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1862 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1863 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1864 0xa6, 0x37, 0xb3, 0x9b
1867 static const u8 C6[] = {
1868 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1869 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1870 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1871 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1872 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1873 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1874 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1875 0x4c, 0x34, 0xae, 0xe5
1878 static const u8 T6[] = {
1879 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1880 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1884 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1885 static const u8 T7[] = {
1886 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1887 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1894 static const u8 P8[16];
1895 static const u8 C8[] = {
1896 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1897 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1900 static const u8 T8[] = {
1901 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1902 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1907 static const u8 K9[] = {
1908 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1909 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1910 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1913 static const u8 P9[] = {
1914 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1915 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1916 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1917 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1918 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1919 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1920 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1921 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1924 static const u8 IV9[] = {
1925 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1926 0xde, 0xca, 0xf8, 0x88
1929 static const u8 C9[] = {
1930 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1931 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1932 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1933 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1934 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1935 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1936 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1937 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1940 static const u8 T9[] = {
1941 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1942 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1948 static const u8 P10[] = {
1949 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1950 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1951 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1952 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1953 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1954 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1955 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1956 0xba, 0x63, 0x7b, 0x39
1959 static const u8 A10[] = {
1960 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1961 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1962 0xab, 0xad, 0xda, 0xd2
1965 static const u8 C10[] = {
1966 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1967 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1968 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1969 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1970 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1971 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1972 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1973 0xcc, 0xda, 0x27, 0x10
1976 static const u8 T10[] = {
1977 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1978 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1985 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1987 static const u8 C11[] = {
1988 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1989 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1990 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1991 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1992 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1993 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1994 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1995 0xa0, 0xf0, 0x62, 0xf7
1998 static const u8 T11[] = {
1999 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
2000 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2007 static const u8 IV12[] = {
2008 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2009 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2010 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2011 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2012 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2013 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2014 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2015 0xa6, 0x37, 0xb3, 0x9b
2018 static const u8 C12[] = {
2019 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2020 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2021 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2022 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2023 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2024 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2025 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2026 0xe9, 0xb7, 0x37, 0x3b
2029 static const u8 T12[] = {
2030 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2031 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2035 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2036 static const u8 T13[] = {
2037 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2038 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2044 static const u8 P14[16], IV14[12];
2045 static const u8 C14[] = {
2046 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2047 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2050 static const u8 T14[] = {
2051 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2052 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2057 static const u8 K15[] = {
2058 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2059 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2060 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2061 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2064 static const u8 P15[] = {
2065 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2066 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2067 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2068 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2069 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2070 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2071 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2072 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2075 static const u8 IV15[] = {
2076 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2077 0xde, 0xca, 0xf8, 0x88
2080 static const u8 C15[] = {
2081 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2082 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2083 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2084 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2085 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2086 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2087 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2088 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2091 static const u8 T15[] = {
2092 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2093 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2099 static const u8 P16[] = {
2100 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2101 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2102 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2103 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2104 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2105 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2106 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2107 0xba, 0x63, 0x7b, 0x39
2110 static const u8 A16[] = {
2111 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2112 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2113 0xab, 0xad, 0xda, 0xd2
2116 static const u8 C16[] = {
2117 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2118 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2119 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2120 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2121 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2122 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2123 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2124 0xbc, 0xc9, 0xf6, 0x62
2127 static const u8 T16[] = {
2128 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2129 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2136 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2138 static const u8 C17[] = {
2139 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2140 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2141 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2142 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2143 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2144 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2145 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2146 0xf4, 0x7c, 0x9b, 0x1f
2149 static const u8 T17[] = {
2150 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2151 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2158 static const u8 IV18[] = {
2159 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2160 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2161 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2162 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2163 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2164 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2165 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2166 0xa6, 0x37, 0xb3, 0x9b
2169 static const u8 C18[] = {
2170 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2171 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2172 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2173 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2174 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2175 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2176 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2177 0x44, 0xae, 0x7e, 0x3f
2180 static const u8 T18[] = {
2181 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2182 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2190 static const u8 A19[] = {
2191 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2192 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2193 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2194 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2195 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2196 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2197 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2198 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2199 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2200 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2201 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2202 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2203 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2204 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2205 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2206 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2209 static const u8 T19[] = {
2210 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2211 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2217 /* this results in 0xff in counter LSB */
2218 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2220 static const u8 P20[288];
2221 static const u8 C20[] = {
2222 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2223 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2224 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2225 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2226 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2227 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2228 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2229 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2230 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2231 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2232 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2233 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2234 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2235 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2236 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2237 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2238 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2239 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2240 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2241 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2242 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2243 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2244 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2245 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2246 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2247 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2248 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2249 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2250 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2251 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2252 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2253 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2254 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2255 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2256 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2257 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2260 static const u8 T20[] = {
2261 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2262 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2265 # define TEST_CASE(n) do { \
2266 u8 out[sizeof(P##n)]; \
2267 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2268 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2269 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2270 memset(out,0,sizeof(out)); \
2271 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2272 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2273 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2274 (C##n && memcmp(out,C##n,sizeof(out)))) \
2275 ret++, printf ("encrypt test#%d failed.\n",n); \
2276 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2277 memset(out,0,sizeof(out)); \
2278 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2279 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2280 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2281 (P##n && memcmp(out,P##n,sizeof(out)))) \
2282 ret++, printf ("decrypt test#%d failed.\n",n); \
2312 # ifdef OPENSSL_CPUID_OBJ
2314 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2321 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2322 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2323 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2325 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2326 start = OPENSSL_rdtsc();
2327 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2328 gcm_t = OPENSSL_rdtsc() - start;
2330 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2331 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2332 (block128_f) AES_encrypt);
2333 start = OPENSSL_rdtsc();
2334 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2335 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2336 (block128_f) AES_encrypt);
2337 ctr_t = OPENSSL_rdtsc() - start;
2339 printf("%.2f-%.2f=%.2f\n",
2340 gcm_t / (double)sizeof(buf),
2341 ctr_t / (double)sizeof(buf),
2342 (gcm_t - ctr_t) / (double)sizeof(buf));
2345 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2346 const u8 *inp, size_t len) = ctx.ghash;
2348 GHASH((&ctx), buf.c, sizeof(buf));
2349 start = OPENSSL_rdtsc();
2350 for (i = 0; i < 100; ++i)
2351 GHASH((&ctx), buf.c, sizeof(buf));
2352 gcm_t = OPENSSL_rdtsc() - start;
2353 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);