1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
61 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62 /* redefine, because alignment is ensured */
64 # define GETU32(p) BSWAP4(*(const u32 *)(p))
66 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70 #define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
115 * Value of 1 is not appropriate for performance reasons.
119 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
143 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 const u8 *xi = (const u8 *)Xi + 15;
154 static const size_t rem_8bit[256] = {
155 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
156 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
157 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
158 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
159 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
160 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
161 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
162 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
163 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
164 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
165 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
166 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
167 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
168 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
169 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
170 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
171 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
172 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
173 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
174 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
175 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
176 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
177 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
178 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
179 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
180 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
181 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
182 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
183 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
184 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
185 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
186 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
187 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
188 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
189 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
190 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
191 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
192 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
193 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
194 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
195 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
196 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
197 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
198 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
199 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
200 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
201 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
202 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
203 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
204 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
205 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
206 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
207 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
208 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
209 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
210 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
211 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
212 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
213 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
214 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
215 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
216 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
217 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
218 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
222 Z.hi ^= Htable[n].hi;
223 Z.lo ^= Htable[n].lo;
230 rem = (size_t)Z.lo & 0xff;
231 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
233 if (sizeof(size_t) == 8)
234 Z.hi ^= rem_8bit[rem];
236 Z.hi ^= (u64)rem_8bit[rem] << 32;
239 if (is_endian.little) {
241 Xi[0] = BSWAP8(Z.hi);
242 Xi[1] = BSWAP8(Z.lo);
246 v = (u32)(Z.hi >> 32);
250 v = (u32)(Z.lo >> 32);
261 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
265 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
268 # if defined(OPENSSL_SMALL_FOOTPRINT)
277 # if defined(OPENSSL_SMALL_FOOTPRINT)
278 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
283 for (i = 2; i < 16; i <<= 1) {
284 u128 *Hi = Htable + i;
286 for (V = *Hi, j = 1; j < i; ++j) {
287 Hi[j].hi = V.hi ^ Htable[j].hi;
288 Hi[j].lo = V.lo ^ Htable[j].lo;
299 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
301 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
302 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
303 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
305 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
306 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
307 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
308 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
309 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
310 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
311 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
313 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
315 * ARM assembler expects specific dword order in Htable.
326 if (is_endian.little)
327 for (j = 0; j < 16; ++j) {
332 for (j = 0; j < 16; ++j) {
334 Htable[j].hi = V.lo << 32 | V.lo >> 32;
335 Htable[j].lo = V.hi << 32 | V.hi >> 32;
342 static const size_t rem_4bit[16] = {
343 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
344 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
345 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
346 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
349 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
353 size_t rem, nlo, nhi;
361 nlo = ((const u8 *)Xi)[15];
365 Z.hi = Htable[nlo].hi;
366 Z.lo = Htable[nlo].lo;
369 rem = (size_t)Z.lo & 0xf;
370 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
372 if (sizeof(size_t) == 8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem] << 32;
377 Z.hi ^= Htable[nhi].hi;
378 Z.lo ^= Htable[nhi].lo;
383 nlo = ((const u8 *)Xi)[cnt];
387 rem = (size_t)Z.lo & 0xf;
388 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
390 if (sizeof(size_t) == 8)
391 Z.hi ^= rem_4bit[rem];
393 Z.hi ^= (u64)rem_4bit[rem] << 32;
395 Z.hi ^= Htable[nlo].hi;
396 Z.lo ^= Htable[nlo].lo;
399 if (is_endian.little) {
401 Xi[0] = BSWAP8(Z.hi);
402 Xi[1] = BSWAP8(Z.lo);
406 v = (u32)(Z.hi >> 32);
410 v = (u32)(Z.lo >> 32);
421 # if !defined(OPENSSL_SMALL_FOOTPRINT)
423 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
424 * details... Compiler-generated code doesn't seem to give any
425 * performance improvement, at least not on x86[_64]. It's here
426 * mostly as reference and a placeholder for possible future
427 * non-trivial optimization[s]...
429 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
430 const u8 *inp, size_t len)
434 size_t rem, nlo, nhi;
445 nlo = ((const u8 *)Xi)[15];
450 Z.hi = Htable[nlo].hi;
451 Z.lo = Htable[nlo].lo;
454 rem = (size_t)Z.lo & 0xf;
455 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
457 if (sizeof(size_t) == 8)
458 Z.hi ^= rem_4bit[rem];
460 Z.hi ^= (u64)rem_4bit[rem] << 32;
462 Z.hi ^= Htable[nhi].hi;
463 Z.lo ^= Htable[nhi].lo;
468 nlo = ((const u8 *)Xi)[cnt];
473 rem = (size_t)Z.lo & 0xf;
474 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
476 if (sizeof(size_t) == 8)
477 Z.hi ^= rem_4bit[rem];
479 Z.hi ^= (u64)rem_4bit[rem] << 32;
481 Z.hi ^= Htable[nlo].hi;
482 Z.lo ^= Htable[nlo].lo;
486 * Extra 256+16 bytes per-key plus 512 bytes shared tables
487 * [should] give ~50% improvement... One could have PACK()-ed
488 * the rem_8bit even here, but the priority is to minimize
491 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
492 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
493 static const unsigned short rem_8bit[256] = {
494 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
495 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
496 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
497 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
498 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
499 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
500 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
501 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
502 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
503 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
504 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
505 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
506 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
507 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
508 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
509 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
510 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
511 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
512 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
513 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
514 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
515 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
516 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
517 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
518 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
519 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
520 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
521 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
522 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
523 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
524 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
525 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
528 * This pre-processing phase slows down procedure by approximately
529 * same time as it makes each loop spin faster. In other words
530 * single block performance is approximately same as straightforward
531 * "4-bit" implementation, and then it goes only faster...
533 for (cnt = 0; cnt < 16; ++cnt) {
534 Z.hi = Htable[cnt].hi;
535 Z.lo = Htable[cnt].lo;
536 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
537 Hshr4[cnt].hi = (Z.hi >> 4);
538 Hshl4[cnt] = (u8)(Z.lo << 4);
542 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
543 nlo = ((const u8 *)Xi)[cnt];
548 Z.hi ^= Htable[nlo].hi;
549 Z.lo ^= Htable[nlo].lo;
551 rem = (size_t)Z.lo & 0xff;
553 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
556 Z.hi ^= Hshr4[nhi].hi;
557 Z.lo ^= Hshr4[nhi].lo;
558 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
561 nlo = ((const u8 *)Xi)[0];
566 Z.hi ^= Htable[nlo].hi;
567 Z.lo ^= Htable[nlo].lo;
569 rem = (size_t)Z.lo & 0xf;
571 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
574 Z.hi ^= Htable[nhi].hi;
575 Z.lo ^= Htable[nhi].lo;
576 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
579 if (is_endian.little) {
581 Xi[0] = BSWAP8(Z.hi);
582 Xi[1] = BSWAP8(Z.lo);
586 v = (u32)(Z.hi >> 32);
590 v = (u32)(Z.lo >> 32);
599 } while (inp += 16, len -= 16);
603 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
604 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
608 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
609 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
610 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
612 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
613 * effect. In other words idea is to hash data while it's still in L1 cache
614 * after encryption pass...
616 # define GHASH_CHUNK (3*1024)
619 #else /* TABLE_BITS */
621 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
623 u128 V, Z = { 0, 0 };
626 const long *xi = (const long *)Xi;
634 V.hi = H[0]; /* H is in host byte order, no byte swapping */
637 for (j = 0; j < 16 / sizeof(long); ++j) {
638 if (is_endian.little) {
639 if (sizeof(long) == 8) {
641 X = (long)(BSWAP8(xi[j]));
643 const u8 *p = (const u8 *)(xi + j);
644 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
647 const u8 *p = (const u8 *)(xi + j);
653 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
654 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
662 if (is_endian.little) {
664 Xi[0] = BSWAP8(Z.hi);
665 Xi[1] = BSWAP8(Z.lo);
669 v = (u32)(Z.hi >> 32);
673 v = (u32)(Z.lo >> 32);
684 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
688 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
689 # if !defined(I386_ONLY) && \
690 (defined(__i386) || defined(__i386__) || \
691 defined(__x86_64) || defined(__x86_64__) || \
692 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
693 # define GHASH_ASM_X86_OR_64
694 # define GCM_FUNCREF_4BIT
695 extern unsigned int OPENSSL_ia32cap_P[2];
697 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
698 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
702 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
703 # define gcm_init_avx gcm_init_clmul
704 # define gcm_gmult_avx gcm_gmult_clmul
705 # define gcm_ghash_avx gcm_ghash_clmul
707 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
708 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
709 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
713 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
714 # define GHASH_ASM_X86
715 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
716 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
719 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
720 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
723 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
724 # include "arm_arch.h"
725 # if __ARM_MAX_ARCH__>=7
726 # define GHASH_ASM_ARM
727 # define GCM_FUNCREF_4BIT
728 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
729 # if defined(__arm__) || defined(__arm)
730 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
732 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
733 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
734 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
736 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
737 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
738 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
741 # elif defined(__sparc__) || defined(__sparc)
742 # include "sparc_arch.h"
743 # define GHASH_ASM_SPARC
744 # define GCM_FUNCREF_4BIT
745 extern unsigned int OPENSSL_sparcv9cap_P[];
746 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
747 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
748 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
750 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
751 # include "ppc_arch.h"
752 # define GHASH_ASM_PPC
753 # define GCM_FUNCREF_4BIT
754 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
755 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
756 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
761 #ifdef GCM_FUNCREF_4BIT
763 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
766 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
770 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
779 memset(ctx, 0, sizeof(*ctx));
783 (*block) (ctx->H.c, ctx->H.c, key);
785 if (is_endian.little) {
786 /* H is stored in host byte order */
788 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
789 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
793 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
794 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
800 gcm_init_8bit(ctx->Htable, ctx->H.u);
802 # if defined(GHASH_ASM_X86_OR_64)
803 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
804 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
805 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
806 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
807 gcm_init_avx(ctx->Htable, ctx->H.u);
808 ctx->gmult = gcm_gmult_avx;
809 ctx->ghash = gcm_ghash_avx;
811 gcm_init_clmul(ctx->Htable, ctx->H.u);
812 ctx->gmult = gcm_gmult_clmul;
813 ctx->ghash = gcm_ghash_clmul;
818 gcm_init_4bit(ctx->Htable, ctx->H.u);
819 # if defined(GHASH_ASM_X86) /* x86 only */
820 # if defined(OPENSSL_IA32_SSE2)
821 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
823 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
825 ctx->gmult = gcm_gmult_4bit_mmx;
826 ctx->ghash = gcm_ghash_4bit_mmx;
828 ctx->gmult = gcm_gmult_4bit_x86;
829 ctx->ghash = gcm_ghash_4bit_x86;
832 ctx->gmult = gcm_gmult_4bit;
833 ctx->ghash = gcm_ghash_4bit;
835 # elif defined(GHASH_ASM_ARM)
836 # ifdef PMULL_CAPABLE
838 gcm_init_v8(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_v8;
840 ctx->ghash = gcm_ghash_v8;
845 gcm_init_neon(ctx->Htable, ctx->H.u);
846 ctx->gmult = gcm_gmult_neon;
847 ctx->ghash = gcm_ghash_neon;
851 gcm_init_4bit(ctx->Htable, ctx->H.u);
852 ctx->gmult = gcm_gmult_4bit;
854 ctx->ghash = gcm_ghash_4bit;
859 # elif defined(GHASH_ASM_SPARC)
860 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
861 gcm_init_vis3(ctx->Htable, ctx->H.u);
862 ctx->gmult = gcm_gmult_vis3;
863 ctx->ghash = gcm_ghash_vis3;
865 gcm_init_4bit(ctx->Htable, ctx->H.u);
866 ctx->gmult = gcm_gmult_4bit;
867 ctx->ghash = gcm_ghash_4bit;
869 # elif defined(GHASH_ASM_PPC)
870 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
871 gcm_init_p8(ctx->Htable, ctx->H.u);
872 ctx->gmult = gcm_gmult_p8;
873 ctx->ghash = gcm_ghash_p8;
875 gcm_init_4bit(ctx->Htable, ctx->H.u);
876 ctx->gmult = gcm_gmult_4bit;
878 ctx->ghash = gcm_ghash_4bit;
884 gcm_init_4bit(ctx->Htable, ctx->H.u);
889 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
899 #ifdef GCM_FUNCREF_4BIT
900 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
907 ctx->len.u[0] = 0; /* AAD length */
908 ctx->len.u[1] = 0; /* message length */
913 memcpy(ctx->Yi.c, iv, 12);
921 for (i = 0; i < 16; ++i)
922 ctx->Yi.c[i] ^= iv[i];
928 for (i = 0; i < len; ++i)
929 ctx->Yi.c[i] ^= iv[i];
933 if (is_endian.little) {
935 ctx->Yi.u[1] ^= BSWAP8(len0);
937 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
938 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
939 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
940 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
941 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
942 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
943 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
944 ctx->Yi.c[15] ^= (u8)(len0);
947 ctx->Yi.u[1] ^= len0;
951 if (is_endian.little)
953 ctr = BSWAP4(ctx->Yi.d[3]);
955 ctr = GETU32(ctx->Yi.c + 12);
961 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
963 if (is_endian.little)
965 ctx->Yi.d[3] = BSWAP4(ctr);
967 PUTU32(ctx->Yi.c + 12, ctr);
973 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
978 u64 alen = ctx->len.u[0];
979 #ifdef GCM_FUNCREF_4BIT
980 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
982 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
983 const u8 *inp, size_t len) = ctx->ghash;
991 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
993 ctx->len.u[0] = alen;
998 ctx->Xi.c[n] ^= *(aad++);
1010 if ((i = (len & (size_t)-16))) {
1017 for (i = 0; i < 16; ++i)
1018 ctx->Xi.c[i] ^= aad[i];
1025 n = (unsigned int)len;
1026 for (i = 0; i < len; ++i)
1027 ctx->Xi.c[i] ^= aad[i];
1034 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1035 const unsigned char *in, unsigned char *out,
1044 unsigned int n, ctr;
1046 u64 mlen = ctx->len.u[1];
1047 block128_f block = ctx->block;
1048 void *key = ctx->key;
1049 #ifdef GCM_FUNCREF_4BIT
1050 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1052 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1053 const u8 *inp, size_t len) = ctx->ghash;
1058 n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
1061 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1063 ctx->len.u[1] = mlen;
1066 /* First call to encrypt finalizes GHASH(AAD) */
1071 if (is_endian.little)
1073 ctr = BSWAP4(ctx->Yi.d[3]);
1075 ctr = GETU32(ctx->Yi.c + 12);
1081 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1082 if (16 % sizeof(size_t) == 0) { /* always true actually */
1086 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1097 # if defined(STRICT_ALIGNMENT)
1098 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1101 # if defined(GHASH) && defined(GHASH_CHUNK)
1102 while (len >= GHASH_CHUNK) {
1103 size_t j = GHASH_CHUNK;
1106 size_t *out_t = (size_t *)out;
1107 const size_t *in_t = (const size_t *)in;
1109 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1111 if (is_endian.little)
1113 ctx->Yi.d[3] = BSWAP4(ctr);
1115 PUTU32(ctx->Yi.c + 12, ctr);
1119 for (i = 0; i < 16 / sizeof(size_t); ++i)
1120 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1125 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1128 if ((i = (len & (size_t)-16))) {
1132 size_t *out_t = (size_t *)out;
1133 const size_t *in_t = (const size_t *)in;
1135 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1137 if (is_endian.little)
1139 ctx->Yi.d[3] = BSWAP4(ctr);
1141 PUTU32(ctx->Yi.c + 12, ctr);
1145 for (i = 0; i < 16 / sizeof(size_t); ++i)
1146 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1151 GHASH(ctx, out - j, j);
1155 size_t *out_t = (size_t *)out;
1156 const size_t *in_t = (const size_t *)in;
1158 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1160 if (is_endian.little)
1162 ctx->Yi.d[3] = BSWAP4(ctr);
1164 PUTU32(ctx->Yi.c + 12, ctr);
1168 for (i = 0; i < 16 / sizeof(size_t); ++i)
1169 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1177 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1179 if (is_endian.little)
1181 ctx->Yi.d[3] = BSWAP4(ctr);
1183 PUTU32(ctx->Yi.c + 12, ctr);
1188 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1198 for (i = 0; i < len; ++i) {
1200 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1202 if (is_endian.little)
1204 ctx->Yi.d[3] = BSWAP4(ctr);
1206 PUTU32(ctx->Yi.c + 12, ctr);
1211 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1221 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1222 const unsigned char *in, unsigned char *out,
1231 unsigned int n, ctr;
1233 u64 mlen = ctx->len.u[1];
1234 block128_f block = ctx->block;
1235 void *key = ctx->key;
1236 #ifdef GCM_FUNCREF_4BIT
1237 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1239 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1240 const u8 *inp, size_t len) = ctx->ghash;
1245 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1247 ctx->len.u[1] = mlen;
1250 /* First call to decrypt finalizes GHASH(AAD) */
1255 if (is_endian.little)
1257 ctr = BSWAP4(ctx->Yi.d[3]);
1259 ctr = GETU32(ctx->Yi.c + 12);
1265 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1266 if (16 % sizeof(size_t) == 0) { /* always true actually */
1271 *(out++) = c ^ ctx->EKi.c[n];
1283 # if defined(STRICT_ALIGNMENT)
1284 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287 # if defined(GHASH) && defined(GHASH_CHUNK)
1288 while (len >= GHASH_CHUNK) {
1289 size_t j = GHASH_CHUNK;
1291 GHASH(ctx, in, GHASH_CHUNK);
1293 size_t *out_t = (size_t *)out;
1294 const size_t *in_t = (const size_t *)in;
1296 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1298 if (is_endian.little)
1300 ctx->Yi.d[3] = BSWAP4(ctr);
1302 PUTU32(ctx->Yi.c + 12, ctr);
1306 for (i = 0; i < 16 / sizeof(size_t); ++i)
1307 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1314 if ((i = (len & (size_t)-16))) {
1317 size_t *out_t = (size_t *)out;
1318 const size_t *in_t = (const size_t *)in;
1320 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1322 if (is_endian.little)
1324 ctx->Yi.d[3] = BSWAP4(ctr);
1326 PUTU32(ctx->Yi.c + 12, ctr);
1330 for (i = 0; i < 16 / sizeof(size_t); ++i)
1331 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1339 size_t *out_t = (size_t *)out;
1340 const size_t *in_t = (const size_t *)in;
1342 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1344 if (is_endian.little)
1346 ctx->Yi.d[3] = BSWAP4(ctr);
1348 PUTU32(ctx->Yi.c + 12, ctr);
1352 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1354 out[i] = c ^ ctx->EKi.t[i];
1364 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1366 if (is_endian.little)
1368 ctx->Yi.d[3] = BSWAP4(ctr);
1370 PUTU32(ctx->Yi.c + 12, ctr);
1377 out[n] = c ^ ctx->EKi.c[n];
1387 for (i = 0; i < len; ++i) {
1390 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1392 if (is_endian.little)
1394 ctx->Yi.d[3] = BSWAP4(ctr);
1396 PUTU32(ctx->Yi.c + 12, ctr);
1402 out[i] = c ^ ctx->EKi.c[n];
1413 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1414 const unsigned char *in, unsigned char *out,
1415 size_t len, ctr128_f stream)
1423 unsigned int n, ctr;
1425 u64 mlen = ctx->len.u[1];
1426 void *key = ctx->key;
1427 #ifdef GCM_FUNCREF_4BIT
1428 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1430 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1431 const u8 *inp, size_t len) = ctx->ghash;
1436 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1438 ctx->len.u[1] = mlen;
1441 /* First call to encrypt finalizes GHASH(AAD) */
1446 if (is_endian.little)
1448 ctr = BSWAP4(ctx->Yi.d[3]);
1450 ctr = GETU32(ctx->Yi.c + 12);
1458 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1469 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1470 while (len >= GHASH_CHUNK) {
1471 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1472 ctr += GHASH_CHUNK / 16;
1473 if (is_endian.little)
1475 ctx->Yi.d[3] = BSWAP4(ctr);
1477 PUTU32(ctx->Yi.c + 12, ctr);
1481 GHASH(ctx, out, GHASH_CHUNK);
1487 if ((i = (len & (size_t)-16))) {
1490 (*stream) (in, out, j, key, ctx->Yi.c);
1491 ctr += (unsigned int)j;
1492 if (is_endian.little)
1494 ctx->Yi.d[3] = BSWAP4(ctr);
1496 PUTU32(ctx->Yi.c + 12, ctr);
1507 for (i = 0; i < 16; ++i)
1508 ctx->Xi.c[i] ^= out[i];
1515 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1517 if (is_endian.little)
1519 ctx->Yi.d[3] = BSWAP4(ctr);
1521 PUTU32(ctx->Yi.c + 12, ctr);
1526 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1535 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1536 const unsigned char *in, unsigned char *out,
1537 size_t len, ctr128_f stream)
1545 unsigned int n, ctr;
1547 u64 mlen = ctx->len.u[1];
1548 void *key = ctx->key;
1549 #ifdef GCM_FUNCREF_4BIT
1550 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1552 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1553 const u8 *inp, size_t len) = ctx->ghash;
1558 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1560 ctx->len.u[1] = mlen;
1563 /* First call to decrypt finalizes GHASH(AAD) */
1568 if (is_endian.little)
1570 ctr = BSWAP4(ctx->Yi.d[3]);
1572 ctr = GETU32(ctx->Yi.c + 12);
1581 *(out++) = c ^ ctx->EKi.c[n];
1593 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1594 while (len >= GHASH_CHUNK) {
1595 GHASH(ctx, in, GHASH_CHUNK);
1596 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1597 ctr += GHASH_CHUNK / 16;
1598 if (is_endian.little)
1600 ctx->Yi.d[3] = BSWAP4(ctr);
1602 PUTU32(ctx->Yi.c + 12, ctr);
1611 if ((i = (len & (size_t)-16))) {
1619 for (k = 0; k < 16; ++k)
1620 ctx->Xi.c[k] ^= in[k];
1627 (*stream) (in, out, j, key, ctx->Yi.c);
1628 ctr += (unsigned int)j;
1629 if (is_endian.little)
1631 ctx->Yi.d[3] = BSWAP4(ctr);
1633 PUTU32(ctx->Yi.c + 12, ctr);
1642 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1644 if (is_endian.little)
1646 ctx->Yi.d[3] = BSWAP4(ctr);
1648 PUTU32(ctx->Yi.c + 12, ctr);
1655 out[n] = c ^ ctx->EKi.c[n];
1664 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1673 u64 alen = ctx->len.u[0] << 3;
1674 u64 clen = ctx->len.u[1] << 3;
1675 #ifdef GCM_FUNCREF_4BIT
1676 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1679 if (ctx->mres || ctx->ares)
1682 if (is_endian.little) {
1684 alen = BSWAP8(alen);
1685 clen = BSWAP8(clen);
1689 ctx->len.u[0] = alen;
1690 ctx->len.u[1] = clen;
1692 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1693 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1697 ctx->Xi.u[0] ^= alen;
1698 ctx->Xi.u[1] ^= clen;
1701 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1702 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1704 if (tag && len <= sizeof(ctx->Xi))
1705 return memcmp(ctx->Xi.c, tag, len);
1710 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1712 CRYPTO_gcm128_finish(ctx, NULL, 0);
1713 memcpy(tag, ctx->Xi.c,
1714 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1717 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1719 GCM128_CONTEXT *ret;
1721 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1722 CRYPTO_gcm128_init(ret, key, block);
1727 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1730 OPENSSL_cleanse(ctx, sizeof(*ctx));
1735 #if defined(SELFTEST)
1737 # include <openssl/aes.h>
1740 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1741 static const u8 T1[] = {
1742 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1743 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1750 static const u8 P2[16];
1751 static const u8 C2[] = {
1752 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1753 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1756 static const u8 T2[] = {
1757 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1758 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1763 static const u8 K3[] = {
1764 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1765 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1768 static const u8 P3[] = {
1769 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1770 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1771 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1772 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1773 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1774 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1775 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1776 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1779 static const u8 IV3[] = {
1780 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1781 0xde, 0xca, 0xf8, 0x88
1784 static const u8 C3[] = {
1785 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1786 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1787 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1788 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1789 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1790 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1791 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1792 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1795 static const u8 T3[] = {
1796 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1797 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1803 static const u8 P4[] = {
1804 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1805 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1806 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1807 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1808 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1809 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1810 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1811 0xba, 0x63, 0x7b, 0x39
1814 static const u8 A4[] = {
1815 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1816 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1817 0xab, 0xad, 0xda, 0xd2
1820 static const u8 C4[] = {
1821 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1822 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1823 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1824 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1825 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1826 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1827 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1828 0x3d, 0x58, 0xe0, 0x91
1831 static const u8 T4[] = {
1832 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1833 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1840 static const u8 IV5[] = {
1841 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1844 static const u8 C5[] = {
1845 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1846 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1847 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1848 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1849 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1850 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1851 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1852 0xc2, 0x3f, 0x45, 0x98
1855 static const u8 T5[] = {
1856 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1857 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1864 static const u8 IV6[] = {
1865 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1866 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1867 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1868 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1869 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1870 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1871 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1872 0xa6, 0x37, 0xb3, 0x9b
1875 static const u8 C6[] = {
1876 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1877 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1878 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1879 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1880 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1881 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1882 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1883 0x4c, 0x34, 0xae, 0xe5
1886 static const u8 T6[] = {
1887 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1888 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1892 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1893 static const u8 T7[] = {
1894 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1895 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1902 static const u8 P8[16];
1903 static const u8 C8[] = {
1904 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1905 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1908 static const u8 T8[] = {
1909 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1910 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1915 static const u8 K9[] = {
1916 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1917 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1918 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1921 static const u8 P9[] = {
1922 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1923 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1924 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1925 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1926 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1927 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1928 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1929 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1932 static const u8 IV9[] = {
1933 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1934 0xde, 0xca, 0xf8, 0x88
1937 static const u8 C9[] = {
1938 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1939 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1940 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1941 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1942 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1943 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1944 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1945 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1948 static const u8 T9[] = {
1949 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1950 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1956 static const u8 P10[] = {
1957 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1958 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1959 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1960 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1961 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1962 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1963 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1964 0xba, 0x63, 0x7b, 0x39
1967 static const u8 A10[] = {
1968 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1969 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1970 0xab, 0xad, 0xda, 0xd2
1973 static const u8 C10[] = {
1974 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1975 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1976 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1977 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1978 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1979 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1980 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1981 0xcc, 0xda, 0x27, 0x10
1984 static const u8 T10[] = {
1985 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1986 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1993 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1995 static const u8 C11[] = {
1996 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1997 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1998 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1999 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
2000 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
2001 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
2002 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
2003 0xa0, 0xf0, 0x62, 0xf7
2006 static const u8 T11[] = {
2007 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
2008 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2015 static const u8 IV12[] = {
2016 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2017 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2018 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2019 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2020 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2021 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2022 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2023 0xa6, 0x37, 0xb3, 0x9b
2026 static const u8 C12[] = {
2027 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2028 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2029 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2030 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2031 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2032 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2033 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2034 0xe9, 0xb7, 0x37, 0x3b
2037 static const u8 T12[] = {
2038 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2039 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2043 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2044 static const u8 T13[] = {
2045 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2046 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2052 static const u8 P14[16], IV14[12];
2053 static const u8 C14[] = {
2054 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2055 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2058 static const u8 T14[] = {
2059 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2060 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2065 static const u8 K15[] = {
2066 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2067 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2068 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2069 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2072 static const u8 P15[] = {
2073 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2074 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2075 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2076 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2077 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2078 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2079 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2080 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2083 static const u8 IV15[] = {
2084 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2085 0xde, 0xca, 0xf8, 0x88
2088 static const u8 C15[] = {
2089 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2090 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2091 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2092 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2093 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2094 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2095 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2096 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2099 static const u8 T15[] = {
2100 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2101 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2107 static const u8 P16[] = {
2108 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2109 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2110 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2111 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2112 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2113 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2114 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2115 0xba, 0x63, 0x7b, 0x39
2118 static const u8 A16[] = {
2119 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2120 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2121 0xab, 0xad, 0xda, 0xd2
2124 static const u8 C16[] = {
2125 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2126 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2127 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2128 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2129 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2130 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2131 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2132 0xbc, 0xc9, 0xf6, 0x62
2135 static const u8 T16[] = {
2136 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2137 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2144 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2146 static const u8 C17[] = {
2147 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2148 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2149 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2150 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2151 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2152 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2153 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2154 0xf4, 0x7c, 0x9b, 0x1f
2157 static const u8 T17[] = {
2158 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2159 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2166 static const u8 IV18[] = {
2167 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2168 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2169 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2170 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2171 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2172 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2173 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2174 0xa6, 0x37, 0xb3, 0x9b
2177 static const u8 C18[] = {
2178 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2179 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2180 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2181 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2182 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2183 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2184 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2185 0x44, 0xae, 0x7e, 0x3f
2188 static const u8 T18[] = {
2189 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2190 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2198 static const u8 A19[] = {
2199 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2200 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2201 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2202 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2203 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2204 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2205 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2206 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2207 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2208 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2209 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2210 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2211 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2212 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2213 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2214 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2217 static const u8 T19[] = {
2218 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2219 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2225 /* this results in 0xff in counter LSB */
2226 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2228 static const u8 P20[288];
2229 static const u8 C20[] = {
2230 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2231 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2232 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2233 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2234 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2235 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2236 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2237 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2238 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2239 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2240 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2241 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2242 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2243 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2244 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2245 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2246 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2247 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2248 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2249 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2250 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2251 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2252 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2253 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2254 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2255 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2256 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2257 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2258 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2259 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2260 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2261 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2262 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2263 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2264 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2265 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2268 static const u8 T20[] = {
2269 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2270 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2273 # define TEST_CASE(n) do { \
2274 u8 out[sizeof(P##n)]; \
2275 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2276 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2277 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2278 memset(out,0,sizeof(out)); \
2279 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2280 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2281 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2282 (C##n && memcmp(out,C##n,sizeof(out)))) \
2283 ret++, printf ("encrypt test#%d failed.\n",n); \
2284 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2285 memset(out,0,sizeof(out)); \
2286 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2287 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2288 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2289 (P##n && memcmp(out,P##n,sizeof(out)))) \
2290 ret++, printf ("decrypt test#%d failed.\n",n); \
2320 # ifdef OPENSSL_CPUID_OBJ
2322 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2329 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2330 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2331 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2333 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2334 start = OPENSSL_rdtsc();
2335 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2336 gcm_t = OPENSSL_rdtsc() - start;
2338 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2339 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2340 (block128_f) AES_encrypt);
2341 start = OPENSSL_rdtsc();
2342 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2343 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2344 (block128_f) AES_encrypt);
2345 ctr_t = OPENSSL_rdtsc() - start;
2347 printf("%.2f-%.2f=%.2f\n",
2348 gcm_t / (double)sizeof(buf),
2349 ctr_t / (double)sizeof(buf),
2350 (gcm_t - ctr_t) / (double)sizeof(buf));
2353 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2354 const u8 *inp, size_t len) = ctx.ghash;
2356 GHASH((&ctx), buf.c, sizeof(buf));
2357 start = OPENSSL_rdtsc();
2358 for (i = 0; i < 100; ++i)
2359 GHASH((&ctx), buf.c, sizeof(buf));
2360 gcm_t = OPENSSL_rdtsc() - start;
2361 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);