7e856b54894a36561d2256251aade30269e36583
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 /*-
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if     TABLE_BITS==8
120
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123         int  i, j;
124         u128 V;
125
126         Htable[0].hi = 0;
127         Htable[0].lo = 0;
128         V.hi = H[0];
129         V.lo = H[1];
130
131         for (Htable[128]=V, i=64; i>0; i>>=1) {
132                 REDUCE1BIT(V);
133                 Htable[i] = V;
134         }
135
136         for (i=2; i<256; i<<=1) {
137                 u128 *Hi = Htable+i, H0 = *Hi;
138                 for (j=1; j<i; ++j) {
139                         Hi[j].hi = H0.hi^Htable[j].hi;
140                         Hi[j].lo = H0.lo^Htable[j].lo;
141                 }
142         }
143 }
144
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147         u128 Z = { 0, 0};
148         const u8 *xi = (const u8 *)Xi+15;
149         size_t rem, n = *xi;
150         const union { long one; char little; } is_endian = {1};
151         static const size_t rem_8bit[256] = {
152                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217         while (1) {
218                 Z.hi ^= Htable[n].hi;
219                 Z.lo ^= Htable[n].lo;
220
221                 if ((u8 *)Xi==xi)       break;
222
223                 n = *(--xi);
224
225                 rem  = (size_t)Z.lo&0xff;
226                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227                 Z.hi = (Z.hi>>8);
228                 if (sizeof(size_t)==8)
229                         Z.hi ^= rem_8bit[rem];
230                 else
231                         Z.hi ^= (u64)rem_8bit[rem]<<32;
232         }
233
234         if (is_endian.little) {
235 #ifdef BSWAP8
236                 Xi[0] = BSWAP8(Z.hi);
237                 Xi[1] = BSWAP8(Z.lo);
238 #else
239                 u8 *p = (u8 *)Xi;
240                 u32 v;
241                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
242                 v = (u32)(Z.hi);        PUTU32(p+4,v);
243                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
244                 v = (u32)(Z.lo);        PUTU32(p+12,v);
245 #endif
246         }
247         else {
248                 Xi[0] = Z.hi;
249                 Xi[1] = Z.lo;
250         }
251 }
252 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254 #elif   TABLE_BITS==4
255
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258         u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260         int  i;
261 #endif
262
263         Htable[0].hi = 0;
264         Htable[0].lo = 0;
265         V.hi = H[0];
266         V.lo = H[1];
267
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269         for (Htable[8]=V, i=4; i>0; i>>=1) {
270                 REDUCE1BIT(V);
271                 Htable[i] = V;
272         }
273
274         for (i=2; i<16; i<<=1) {
275                 u128 *Hi = Htable+i;
276                 int   j;
277                 for (V=*Hi, j=1; j<i; ++j) {
278                         Hi[j].hi = V.hi^Htable[j].hi;
279                         Hi[j].lo = V.lo^Htable[j].lo;
280                 }
281         }
282 #else
283         Htable[8] = V;
284         REDUCE1BIT(V);
285         Htable[4] = V;
286         REDUCE1BIT(V);
287         Htable[2] = V;
288         REDUCE1BIT(V);
289         Htable[1] = V;
290         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
291         V=Htable[4];
292         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
293         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
294         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
295         V=Htable[8];
296         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
297         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305         /*
306          * ARM assembler expects specific dword order in Htable.
307          */
308         {
309         int j;
310         const union { long one; char little; } is_endian = {1};
311
312         if (is_endian.little)
313                 for (j=0;j<16;++j) {
314                         V = Htable[j];
315                         Htable[j].hi = V.lo;
316                         Htable[j].lo = V.hi;
317                 }
318         else
319                 for (j=0;j<16;++j) {
320                         V = Htable[j];
321                         Htable[j].hi = V.lo<<32|V.lo>>32;
322                         Htable[j].lo = V.hi<<32|V.hi>>32;
323                 }
324         }
325 #endif
326 }
327
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337         u128 Z;
338         int cnt = 15;
339         size_t rem, nlo, nhi;
340         const union { long one; char little; } is_endian = {1};
341
342         nlo  = ((const u8 *)Xi)[15];
343         nhi  = nlo>>4;
344         nlo &= 0xf;
345
346         Z.hi = Htable[nlo].hi;
347         Z.lo = Htable[nlo].lo;
348
349         while (1) {
350                 rem  = (size_t)Z.lo&0xf;
351                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352                 Z.hi = (Z.hi>>4);
353                 if (sizeof(size_t)==8)
354                         Z.hi ^= rem_4bit[rem];
355                 else
356                         Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358                 Z.hi ^= Htable[nhi].hi;
359                 Z.lo ^= Htable[nhi].lo;
360
361                 if (--cnt<0)            break;
362
363                 nlo  = ((const u8 *)Xi)[cnt];
364                 nhi  = nlo>>4;
365                 nlo &= 0xf;
366
367                 rem  = (size_t)Z.lo&0xf;
368                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369                 Z.hi = (Z.hi>>4);
370                 if (sizeof(size_t)==8)
371                         Z.hi ^= rem_4bit[rem];
372                 else
373                         Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375                 Z.hi ^= Htable[nlo].hi;
376                 Z.lo ^= Htable[nlo].lo;
377         }
378
379         if (is_endian.little) {
380 #ifdef BSWAP8
381                 Xi[0] = BSWAP8(Z.hi);
382                 Xi[1] = BSWAP8(Z.lo);
383 #else
384                 u8 *p = (u8 *)Xi;
385                 u32 v;
386                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
387                 v = (u32)(Z.hi);        PUTU32(p+4,v);
388                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
389                 v = (u32)(Z.lo);        PUTU32(p+12,v);
390 #endif
391         }
392         else {
393                 Xi[0] = Z.hi;
394                 Xi[1] = Z.lo;
395         }
396 }
397
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401  * details... Compiler-generated code doesn't seem to give any
402  * performance improvement, at least not on x86[_64]. It's here
403  * mostly as reference and a placeholder for possible future
404  * non-trivial optimization[s]...
405  */
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407                                 const u8 *inp,size_t len)
408 {
409     u128 Z;
410     int cnt;
411     size_t rem, nlo, nhi;
412     const union { long one; char little; } is_endian = {1};
413
414 #if 1
415     do {
416         cnt  = 15;
417         nlo  = ((const u8 *)Xi)[15];
418         nlo ^= inp[15];
419         nhi  = nlo>>4;
420         nlo &= 0xf;
421
422         Z.hi = Htable[nlo].hi;
423         Z.lo = Htable[nlo].lo;
424
425         while (1) {
426                 rem  = (size_t)Z.lo&0xf;
427                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428                 Z.hi = (Z.hi>>4);
429                 if (sizeof(size_t)==8)
430                         Z.hi ^= rem_4bit[rem];
431                 else
432                         Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434                 Z.hi ^= Htable[nhi].hi;
435                 Z.lo ^= Htable[nhi].lo;
436
437                 if (--cnt<0)            break;
438
439                 nlo  = ((const u8 *)Xi)[cnt];
440                 nlo ^= inp[cnt];
441                 nhi  = nlo>>4;
442                 nlo &= 0xf;
443
444                 rem  = (size_t)Z.lo&0xf;
445                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446                 Z.hi = (Z.hi>>4);
447                 if (sizeof(size_t)==8)
448                         Z.hi ^= rem_4bit[rem];
449                 else
450                         Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452                 Z.hi ^= Htable[nlo].hi;
453                 Z.lo ^= Htable[nlo].lo;
454         }
455 #else
456     /*
457      * Extra 256+16 bytes per-key plus 512 bytes shared tables
458      * [should] give ~50% improvement... One could have PACK()-ed
459      * the rem_8bit even here, but the priority is to minimize
460      * cache footprint...
461      */ 
462     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
463     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
464     static const unsigned short rem_8bit[256] = {
465         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497     /*
498      * This pre-processing phase slows down procedure by approximately
499      * same time as it makes each loop spin faster. In other words
500      * single block performance is approximately same as straightforward
501      * "4-bit" implementation, and then it goes only faster...
502      */
503     for (cnt=0; cnt<16; ++cnt) {
504         Z.hi = Htable[cnt].hi;
505         Z.lo = Htable[cnt].lo;
506         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507         Hshr4[cnt].hi = (Z.hi>>4);
508         Hshl4[cnt]    = (u8)(Z.lo<<4);
509     }
510
511     do {
512         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513                 nlo  = ((const u8 *)Xi)[cnt];
514                 nlo ^= inp[cnt];
515                 nhi  = nlo>>4;
516                 nlo &= 0xf;
517
518                 Z.hi ^= Htable[nlo].hi;
519                 Z.lo ^= Htable[nlo].lo;
520
521                 rem = (size_t)Z.lo&0xff;
522
523                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524                 Z.hi = (Z.hi>>8);
525
526                 Z.hi ^= Hshr4[nhi].hi;
527                 Z.lo ^= Hshr4[nhi].lo;
528                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529         }
530
531         nlo  = ((const u8 *)Xi)[0];
532         nlo ^= inp[0];
533         nhi  = nlo>>4;
534         nlo &= 0xf;
535
536         Z.hi ^= Htable[nlo].hi;
537         Z.lo ^= Htable[nlo].lo;
538
539         rem = (size_t)Z.lo&0xf;
540
541         Z.lo = (Z.hi<<60)|(Z.lo>>4);
542         Z.hi = (Z.hi>>4);
543
544         Z.hi ^= Htable[nhi].hi;
545         Z.lo ^= Htable[nhi].lo;
546         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548
549         if (is_endian.little) {
550 #ifdef BSWAP8
551                 Xi[0] = BSWAP8(Z.hi);
552                 Xi[1] = BSWAP8(Z.lo);
553 #else
554                 u8 *p = (u8 *)Xi;
555                 u32 v;
556                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
557                 v = (u32)(Z.hi);        PUTU32(p+4,v);
558                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
559                 v = (u32)(Z.lo);        PUTU32(p+12,v);
560 #endif
561         }
562         else {
563                 Xi[0] = Z.hi;
564                 Xi[1] = Z.lo;
565         }
566     } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573
574 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578  * trashing effect. In other words idea is to hash data while it's
579  * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK       (3*1024)
581 #endif
582
583 #else   /* TABLE_BITS */
584
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587         u128 V,Z = { 0,0 };
588         long X;
589         int  i,j;
590         const long *xi = (const long *)Xi;
591         const union { long one; char little; } is_endian = {1};
592
593         V.hi = H[0];    /* H is in host byte order, no byte swapping */
594         V.lo = H[1];
595
596         for (j=0; j<16/sizeof(long); ++j) {
597                 if (is_endian.little) {
598                         if (sizeof(long)==8) {
599 #ifdef BSWAP8
600                                 X = (long)(BSWAP8(xi[j]));
601 #else
602                                 const u8 *p = (const u8 *)(xi+j);
603                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605                         }
606                         else {
607                                 const u8 *p = (const u8 *)(xi+j);
608                                 X = (long)GETU32(p);
609                         }
610                 }
611                 else
612                         X = xi[j];
613
614                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615                         u64 M = (u64)(X>>(8*sizeof(long)-1));
616                         Z.hi ^= V.hi&M;
617                         Z.lo ^= V.lo&M;
618
619                         REDUCE1BIT(V);
620                 }
621         }
622
623         if (is_endian.little) {
624 #ifdef BSWAP8
625                 Xi[0] = BSWAP8(Z.hi);
626                 Xi[1] = BSWAP8(Z.lo);
627 #else
628                 u8 *p = (u8 *)Xi;
629                 u32 v;
630                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
631                 v = (u32)(Z.hi);        PUTU32(p+4,v);
632                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
633                 v = (u32)(Z.lo);        PUTU32(p+12,v);
634 #endif
635         }
636         else {
637                 Xi[0] = Z.hi;
638                 Xi[1] = Z.lo;
639         }
640 }
641 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643 #endif
644
645 #if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
646 # if    !defined(I386_ONLY) && \
647         (defined(__i386)        || defined(__i386__)    || \
648          defined(__x86_64)      || defined(__x86_64__)  || \
649          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
650 #  define GHASH_ASM_X86_OR_64
651 #  define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define gcm_init_avx   gcm_init_clmul
660 # define gcm_gmult_avx  gcm_gmult_clmul
661 # define gcm_ghash_avx  gcm_ghash_clmul
662 #else
663 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666 #endif
667
668 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
669 #   define GHASH_ASM_X86
670 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
672
673 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675 #  endif
676 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
677 #  include "arm_arch.h"
678 #  if __ARM_MAX_ARCH__>=7
679 #   define GHASH_ASM_ARM
680 #   define GCM_FUNCREF_4BIT
681 #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
682 #   if defined(__arm__) || defined(__arm)
683 #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
684 #   endif
685 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
686 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
687 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
688 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
691 #  endif
692 # elif defined(__sparc__) || defined(__sparc)
693 #  include "sparc_arch.h"
694 #  define GHASH_ASM_SPARC
695 #  define GCM_FUNCREF_4BIT
696 extern unsigned int OPENSSL_sparcv9cap_P[];
697 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
698 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
699 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
700 #elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
701 #  include "ppc_arch.h"
702 #  define GHASH_ASM_PPC
703 #  define GCM_FUNCREF_4BIT
704 void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
705 void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
706 void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
707 # endif
708 #endif
709
710 #ifdef GCM_FUNCREF_4BIT
711 # undef  GCM_MUL
712 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
713 # ifdef GHASH
714 #  undef  GHASH
715 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
716 # endif
717 #endif
718
719 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
720 {
721         const union { long one; char little; } is_endian = {1};
722
723         memset(ctx,0,sizeof(*ctx));
724         ctx->block = block;
725         ctx->key   = key;
726
727         (*block)(ctx->H.c,ctx->H.c,key);
728
729         if (is_endian.little) {
730                 /* H is stored in host byte order */
731 #ifdef BSWAP8
732                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
733                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
734 #else
735                 u8 *p = ctx->H.c;
736                 u64 hi,lo;
737                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
738                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
739                 ctx->H.u[0] = hi;
740                 ctx->H.u[1] = lo;
741 #endif
742         }
743
744 #if     TABLE_BITS==8
745         gcm_init_8bit(ctx->Htable,ctx->H.u);
746 #elif   TABLE_BITS==4
747 # if    defined(GHASH_ASM_X86_OR_64)
748 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
749         if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
750             OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
751                 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) {  /* AVX+MOVBE */
752                         gcm_init_avx(ctx->Htable,ctx->H.u);
753                         ctx->gmult = gcm_gmult_avx;
754                         ctx->ghash = gcm_ghash_avx;
755                 } else {
756                         gcm_init_clmul(ctx->Htable,ctx->H.u);
757                         ctx->gmult = gcm_gmult_clmul;
758                         ctx->ghash = gcm_ghash_clmul;
759                 }
760                 return;
761         }
762 #  endif
763         gcm_init_4bit(ctx->Htable,ctx->H.u);
764 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
765 #   if  defined(OPENSSL_IA32_SSE2)
766         if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
767 #   else
768         if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
769 #   endif
770                 ctx->gmult = gcm_gmult_4bit_mmx;
771                 ctx->ghash = gcm_ghash_4bit_mmx;
772         } else {
773                 ctx->gmult = gcm_gmult_4bit_x86;
774                 ctx->ghash = gcm_ghash_4bit_x86;
775         }
776 #  else
777         ctx->gmult = gcm_gmult_4bit;
778         ctx->ghash = gcm_ghash_4bit;
779 #  endif
780 # elif  defined(GHASH_ASM_ARM)
781 #  ifdef PMULL_CAPABLE
782         if (PMULL_CAPABLE) {
783                 gcm_init_v8(ctx->Htable,ctx->H.u);
784                 ctx->gmult = gcm_gmult_v8;
785                 ctx->ghash = gcm_ghash_v8;
786         } else
787 #  endif
788 #  ifdef NEON_CAPABLE
789         if (NEON_CAPABLE) {
790                 gcm_init_neon(ctx->Htable,ctx->H.u);
791                 ctx->gmult = gcm_gmult_neon;
792                 ctx->ghash = gcm_ghash_neon;
793         } else
794 #  endif
795         {
796                 gcm_init_4bit(ctx->Htable,ctx->H.u);
797                 ctx->gmult = gcm_gmult_4bit;
798                 ctx->ghash = gcm_ghash_4bit;
799         }
800 # elif  defined(GHASH_ASM_SPARC)
801         if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
802                 gcm_init_vis3(ctx->Htable,ctx->H.u);
803                 ctx->gmult = gcm_gmult_vis3;
804                 ctx->ghash = gcm_ghash_vis3;
805         } else {
806                 gcm_init_4bit(ctx->Htable,ctx->H.u);
807                 ctx->gmult = gcm_gmult_4bit;
808                 ctx->ghash = gcm_ghash_4bit;
809         }
810 # elif  defined(GHASH_ASM_PPC)
811         if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
812                 gcm_init_p8(ctx->Htable,ctx->H.u);
813                 ctx->gmult = gcm_gmult_p8;
814                 ctx->ghash = gcm_ghash_p8;
815         } else {
816                 gcm_init_4bit(ctx->Htable,ctx->H.u);
817                 ctx->gmult = gcm_gmult_4bit;
818                 ctx->ghash = gcm_ghash_4bit;
819         }
820 # else
821         gcm_init_4bit(ctx->Htable,ctx->H.u);
822 # endif
823 #endif
824 }
825
826 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
827 {
828         const union { long one; char little; } is_endian = {1};
829         unsigned int ctr;
830 #ifdef GCM_FUNCREF_4BIT
831         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
832 #endif
833
834         ctx->Yi.u[0]  = 0;
835         ctx->Yi.u[1]  = 0;
836         ctx->Xi.u[0]  = 0;
837         ctx->Xi.u[1]  = 0;
838         ctx->len.u[0] = 0;      /* AAD length */
839         ctx->len.u[1] = 0;      /* message length */
840         ctx->ares = 0;
841         ctx->mres = 0;
842
843         if (len==12) {
844                 memcpy(ctx->Yi.c,iv,12);
845                 ctx->Yi.c[15]=1;
846                 ctr=1;
847         }
848         else {
849                 size_t i;
850                 u64 len0 = len;
851
852                 while (len>=16) {
853                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
854                         GCM_MUL(ctx,Yi);
855                         iv += 16;
856                         len -= 16;
857                 }
858                 if (len) {
859                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
860                         GCM_MUL(ctx,Yi);
861                 }
862                 len0 <<= 3;
863                 if (is_endian.little) {
864 #ifdef BSWAP8
865                         ctx->Yi.u[1]  ^= BSWAP8(len0);
866 #else
867                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
868                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
869                         ctx->Yi.c[10] ^= (u8)(len0>>40);
870                         ctx->Yi.c[11] ^= (u8)(len0>>32);
871                         ctx->Yi.c[12] ^= (u8)(len0>>24);
872                         ctx->Yi.c[13] ^= (u8)(len0>>16);
873                         ctx->Yi.c[14] ^= (u8)(len0>>8);
874                         ctx->Yi.c[15] ^= (u8)(len0);
875 #endif
876                 }
877                 else
878                         ctx->Yi.u[1]  ^= len0;
879
880                 GCM_MUL(ctx,Yi);
881
882                 if (is_endian.little)
883 #ifdef BSWAP4
884                         ctr = BSWAP4(ctx->Yi.d[3]);
885 #else
886                         ctr = GETU32(ctx->Yi.c+12);
887 #endif
888                 else
889                         ctr = ctx->Yi.d[3];
890         }
891
892         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
893         ++ctr;
894         if (is_endian.little)
895 #ifdef BSWAP4
896                 ctx->Yi.d[3] = BSWAP4(ctr);
897 #else
898                 PUTU32(ctx->Yi.c+12,ctr);
899 #endif
900         else
901                 ctx->Yi.d[3] = ctr;
902 }
903
904 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
905 {
906         size_t i;
907         unsigned int n;
908         u64 alen = ctx->len.u[0];
909 #ifdef GCM_FUNCREF_4BIT
910         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
911 # ifdef GHASH
912         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
913                                 const u8 *inp,size_t len)       = ctx->ghash;
914 # endif
915 #endif
916
917         if (ctx->len.u[1]) return -2;
918
919         alen += len;
920         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
921                 return -1;
922         ctx->len.u[0] = alen;
923
924         n = ctx->ares;
925         if (n) {
926                 while (n && len) {
927                         ctx->Xi.c[n] ^= *(aad++);
928                         --len;
929                         n = (n+1)%16;
930                 }
931                 if (n==0) GCM_MUL(ctx,Xi);
932                 else {
933                         ctx->ares = n;
934                         return 0;
935                 }
936         }
937
938 #ifdef GHASH
939         if ((i = (len&(size_t)-16))) {
940                 GHASH(ctx,aad,i);
941                 aad += i;
942                 len -= i;
943         }
944 #else
945         while (len>=16) {
946                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
947                 GCM_MUL(ctx,Xi);
948                 aad += 16;
949                 len -= 16;
950         }
951 #endif
952         if (len) {
953                 n = (unsigned int)len;
954                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
955         }
956
957         ctx->ares = n;
958         return 0;
959 }
960
961 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
962                 const unsigned char *in, unsigned char *out,
963                 size_t len)
964 {
965         const union { long one; char little; } is_endian = {1};
966         unsigned int n, ctr;
967         size_t i;
968         u64        mlen  = ctx->len.u[1];
969         block128_f block = ctx->block;
970         void      *key   = ctx->key;
971 #ifdef GCM_FUNCREF_4BIT
972         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
973 # ifdef GHASH
974         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
975                                 const u8 *inp,size_t len)       = ctx->ghash;
976 # endif
977 #endif
978
979 #if 0
980         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
981 #endif
982         mlen += len;
983         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
984                 return -1;
985         ctx->len.u[1] = mlen;
986
987         if (ctx->ares) {
988                 /* First call to encrypt finalizes GHASH(AAD) */
989                 GCM_MUL(ctx,Xi);
990                 ctx->ares = 0;
991         }
992
993         if (is_endian.little)
994 #ifdef BSWAP4
995                 ctr = BSWAP4(ctx->Yi.d[3]);
996 #else
997                 ctr = GETU32(ctx->Yi.c+12);
998 #endif
999         else
1000                 ctr = ctx->Yi.d[3];
1001
1002         n = ctx->mres;
1003 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1004         if (16%sizeof(size_t) == 0) {   /* always true actually */
1005             do {
1006                 if (n) {
1007                         while (n && len) {
1008                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1009                                 --len;
1010                                 n = (n+1)%16;
1011                         }
1012                         if (n==0) GCM_MUL(ctx,Xi);
1013                         else {
1014                                 ctx->mres = n;
1015                                 return 0;
1016                         }
1017                 }
1018 #if defined(STRICT_ALIGNMENT)
1019                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1020                         break;
1021 #endif
1022 #if defined(GHASH) && defined(GHASH_CHUNK)
1023                 while (len>=GHASH_CHUNK) {
1024                     size_t j=GHASH_CHUNK;
1025
1026                     while (j) {
1027                         size_t *out_t=(size_t *)out;
1028                         const size_t *in_t=(const size_t *)in;
1029
1030                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1031                         ++ctr;
1032                         if (is_endian.little)
1033 #ifdef BSWAP4
1034                                 ctx->Yi.d[3] = BSWAP4(ctr);
1035 #else
1036                                 PUTU32(ctx->Yi.c+12,ctr);
1037 #endif
1038                         else
1039                                 ctx->Yi.d[3] = ctr;
1040                         for (i=0; i<16/sizeof(size_t); ++i)
1041                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1042                         out += 16;
1043                         in  += 16;
1044                         j   -= 16;
1045                     }
1046                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1047                     len -= GHASH_CHUNK;
1048                 }
1049                 if ((i = (len&(size_t)-16))) {
1050                     size_t j=i;
1051
1052                     while (len>=16) {
1053                         size_t *out_t=(size_t *)out;
1054                         const size_t *in_t=(const size_t *)in;
1055
1056                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1057                         ++ctr;
1058                         if (is_endian.little)
1059 #ifdef BSWAP4
1060                                 ctx->Yi.d[3] = BSWAP4(ctr);
1061 #else
1062                                 PUTU32(ctx->Yi.c+12,ctr);
1063 #endif
1064                         else
1065                                 ctx->Yi.d[3] = ctr;
1066                         for (i=0; i<16/sizeof(size_t); ++i)
1067                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1068                         out += 16;
1069                         in  += 16;
1070                         len -= 16;
1071                     }
1072                     GHASH(ctx,out-j,j);
1073                 }
1074 #else
1075                 while (len>=16) {
1076                         size_t *out_t=(size_t *)out;
1077                         const size_t *in_t=(const size_t *)in;
1078
1079                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1080                         ++ctr;
1081                         if (is_endian.little)
1082 #ifdef BSWAP4
1083                                 ctx->Yi.d[3] = BSWAP4(ctr);
1084 #else
1085                                 PUTU32(ctx->Yi.c+12,ctr);
1086 #endif
1087                         else
1088                                 ctx->Yi.d[3] = ctr;
1089                         for (i=0; i<16/sizeof(size_t); ++i)
1090                                 ctx->Xi.t[i] ^=
1091                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1092                         GCM_MUL(ctx,Xi);
1093                         out += 16;
1094                         in  += 16;
1095                         len -= 16;
1096                 }
1097 #endif
1098                 if (len) {
1099                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1100                         ++ctr;
1101                         if (is_endian.little)
1102 #ifdef BSWAP4
1103                                 ctx->Yi.d[3] = BSWAP4(ctr);
1104 #else
1105                                 PUTU32(ctx->Yi.c+12,ctr);
1106 #endif
1107                         else
1108                                 ctx->Yi.d[3] = ctr;
1109                         while (len--) {
1110                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1111                                 ++n;
1112                         }
1113                 }
1114
1115                 ctx->mres = n;
1116                 return 0;
1117             } while(0);
1118         }
1119 #endif
1120         for (i=0;i<len;++i) {
1121                 if (n==0) {
1122                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1123                         ++ctr;
1124                         if (is_endian.little)
1125 #ifdef BSWAP4
1126                                 ctx->Yi.d[3] = BSWAP4(ctr);
1127 #else
1128                                 PUTU32(ctx->Yi.c+12,ctr);
1129 #endif
1130                         else
1131                                 ctx->Yi.d[3] = ctr;
1132                 }
1133                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1134                 n = (n+1)%16;
1135                 if (n==0)
1136                         GCM_MUL(ctx,Xi);
1137         }
1138
1139         ctx->mres = n;
1140         return 0;
1141 }
1142
1143 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1144                 const unsigned char *in, unsigned char *out,
1145                 size_t len)
1146 {
1147         const union { long one; char little; } is_endian = {1};
1148         unsigned int n, ctr;
1149         size_t i;
1150         u64        mlen  = ctx->len.u[1];
1151         block128_f block = ctx->block;
1152         void      *key   = ctx->key;
1153 #ifdef GCM_FUNCREF_4BIT
1154         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1155 # ifdef GHASH
1156         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1157                                 const u8 *inp,size_t len)       = ctx->ghash;
1158 # endif
1159 #endif
1160
1161         mlen += len;
1162         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1163                 return -1;
1164         ctx->len.u[1] = mlen;
1165
1166         if (ctx->ares) {
1167                 /* First call to decrypt finalizes GHASH(AAD) */
1168                 GCM_MUL(ctx,Xi);
1169                 ctx->ares = 0;
1170         }
1171
1172         if (is_endian.little)
1173 #ifdef BSWAP4
1174                 ctr = BSWAP4(ctx->Yi.d[3]);
1175 #else
1176                 ctr = GETU32(ctx->Yi.c+12);
1177 #endif
1178         else
1179                 ctr = ctx->Yi.d[3];
1180
1181         n = ctx->mres;
1182 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1183         if (16%sizeof(size_t) == 0) {   /* always true actually */
1184             do {
1185                 if (n) {
1186                         while (n && len) {
1187                                 u8 c = *(in++);
1188                                 *(out++) = c^ctx->EKi.c[n];
1189                                 ctx->Xi.c[n] ^= c;
1190                                 --len;
1191                                 n = (n+1)%16;
1192                         }
1193                         if (n==0) GCM_MUL (ctx,Xi);
1194                         else {
1195                                 ctx->mres = n;
1196                                 return 0;
1197                         }
1198                 }
1199 #if defined(STRICT_ALIGNMENT)
1200                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1201                         break;
1202 #endif
1203 #if defined(GHASH) && defined(GHASH_CHUNK)
1204                 while (len>=GHASH_CHUNK) {
1205                     size_t j=GHASH_CHUNK;
1206
1207                     GHASH(ctx,in,GHASH_CHUNK);
1208                     while (j) {
1209                         size_t *out_t=(size_t *)out;
1210                         const size_t *in_t=(const size_t *)in;
1211
1212                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1213                         ++ctr;
1214                         if (is_endian.little)
1215 #ifdef BSWAP4
1216                                 ctx->Yi.d[3] = BSWAP4(ctr);
1217 #else
1218                                 PUTU32(ctx->Yi.c+12,ctr);
1219 #endif
1220                         else
1221                                 ctx->Yi.d[3] = ctr;
1222                         for (i=0; i<16/sizeof(size_t); ++i)
1223                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1224                         out += 16;
1225                         in  += 16;
1226                         j   -= 16;
1227                     }
1228                     len -= GHASH_CHUNK;
1229                 }
1230                 if ((i = (len&(size_t)-16))) {
1231                     GHASH(ctx,in,i);
1232                     while (len>=16) {
1233                         size_t *out_t=(size_t *)out;
1234                         const size_t *in_t=(const size_t *)in;
1235
1236                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1237                         ++ctr;
1238                         if (is_endian.little)
1239 #ifdef BSWAP4
1240                                 ctx->Yi.d[3] = BSWAP4(ctr);
1241 #else
1242                                 PUTU32(ctx->Yi.c+12,ctr);
1243 #endif
1244                         else
1245                                 ctx->Yi.d[3] = ctr;
1246                         for (i=0; i<16/sizeof(size_t); ++i)
1247                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1248                         out += 16;
1249                         in  += 16;
1250                         len -= 16;
1251                     }
1252                 }
1253 #else
1254                 while (len>=16) {
1255                         size_t *out_t=(size_t *)out;
1256                         const size_t *in_t=(const size_t *)in;
1257
1258                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1259                         ++ctr;
1260                         if (is_endian.little)
1261 #ifdef BSWAP4
1262                                 ctx->Yi.d[3] = BSWAP4(ctr);
1263 #else
1264                                 PUTU32(ctx->Yi.c+12,ctr);
1265 #endif
1266                         else
1267                                 ctx->Yi.d[3] = ctr;
1268                         for (i=0; i<16/sizeof(size_t); ++i) {
1269                                 size_t c = in[i];
1270                                 out[i] = c^ctx->EKi.t[i];
1271                                 ctx->Xi.t[i] ^= c;
1272                         }
1273                         GCM_MUL(ctx,Xi);
1274                         out += 16;
1275                         in  += 16;
1276                         len -= 16;
1277                 }
1278 #endif
1279                 if (len) {
1280                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1281                         ++ctr;
1282                         if (is_endian.little)
1283 #ifdef BSWAP4
1284                                 ctx->Yi.d[3] = BSWAP4(ctr);
1285 #else
1286                                 PUTU32(ctx->Yi.c+12,ctr);
1287 #endif
1288                         else
1289                                 ctx->Yi.d[3] = ctr;
1290                         while (len--) {
1291                                 u8 c = in[n];
1292                                 ctx->Xi.c[n] ^= c;
1293                                 out[n] = c^ctx->EKi.c[n];
1294                                 ++n;
1295                         }
1296                 }
1297
1298                 ctx->mres = n;
1299                 return 0;
1300             } while(0);
1301         }
1302 #endif
1303         for (i=0;i<len;++i) {
1304                 u8 c;
1305                 if (n==0) {
1306                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1307                         ++ctr;
1308                         if (is_endian.little)
1309 #ifdef BSWAP4
1310                                 ctx->Yi.d[3] = BSWAP4(ctr);
1311 #else
1312                                 PUTU32(ctx->Yi.c+12,ctr);
1313 #endif
1314                         else
1315                                 ctx->Yi.d[3] = ctr;
1316                 }
1317                 c = in[i];
1318                 out[i] = c^ctx->EKi.c[n];
1319                 ctx->Xi.c[n] ^= c;
1320                 n = (n+1)%16;
1321                 if (n==0)
1322                         GCM_MUL(ctx,Xi);
1323         }
1324
1325         ctx->mres = n;
1326         return 0;
1327 }
1328
1329 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1330                 const unsigned char *in, unsigned char *out,
1331                 size_t len, ctr128_f stream)
1332 {
1333         const union { long one; char little; } is_endian = {1};
1334         unsigned int n, ctr;
1335         size_t i;
1336         u64   mlen = ctx->len.u[1];
1337         void *key  = ctx->key;
1338 #ifdef GCM_FUNCREF_4BIT
1339         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1340 # ifdef GHASH
1341         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1342                                 const u8 *inp,size_t len)       = ctx->ghash;
1343 # endif
1344 #endif
1345
1346         mlen += len;
1347         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1348                 return -1;
1349         ctx->len.u[1] = mlen;
1350
1351         if (ctx->ares) {
1352                 /* First call to encrypt finalizes GHASH(AAD) */
1353                 GCM_MUL(ctx,Xi);
1354                 ctx->ares = 0;
1355         }
1356
1357         if (is_endian.little)
1358 #ifdef BSWAP4
1359                 ctr = BSWAP4(ctx->Yi.d[3]);
1360 #else
1361                 ctr = GETU32(ctx->Yi.c+12);
1362 #endif
1363         else
1364                 ctr = ctx->Yi.d[3];
1365
1366         n = ctx->mres;
1367         if (n) {
1368                 while (n && len) {
1369                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1370                         --len;
1371                         n = (n+1)%16;
1372                 }
1373                 if (n==0) GCM_MUL(ctx,Xi);
1374                 else {
1375                         ctx->mres = n;
1376                         return 0;
1377                 }
1378         }
1379 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1380         while (len>=GHASH_CHUNK) {
1381                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1382                 ctr += GHASH_CHUNK/16;
1383                 if (is_endian.little)
1384 #ifdef BSWAP4
1385                         ctx->Yi.d[3] = BSWAP4(ctr);
1386 #else
1387                         PUTU32(ctx->Yi.c+12,ctr);
1388 #endif
1389                 else
1390                         ctx->Yi.d[3] = ctr;
1391                 GHASH(ctx,out,GHASH_CHUNK);
1392                 out += GHASH_CHUNK;
1393                 in  += GHASH_CHUNK;
1394                 len -= GHASH_CHUNK;
1395         }
1396 #endif
1397         if ((i = (len&(size_t)-16))) {
1398                 size_t j=i/16;
1399
1400                 (*stream)(in,out,j,key,ctx->Yi.c);
1401                 ctr += (unsigned int)j;
1402                 if (is_endian.little)
1403 #ifdef BSWAP4
1404                         ctx->Yi.d[3] = BSWAP4(ctr);
1405 #else
1406                         PUTU32(ctx->Yi.c+12,ctr);
1407 #endif
1408                 else
1409                         ctx->Yi.d[3] = ctr;
1410                 in  += i;
1411                 len -= i;
1412 #if defined(GHASH)
1413                 GHASH(ctx,out,i);
1414                 out += i;
1415 #else
1416                 while (j--) {
1417                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1418                         GCM_MUL(ctx,Xi);
1419                         out += 16;
1420                 }
1421 #endif
1422         }
1423         if (len) {
1424                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1425                 ++ctr;
1426                 if (is_endian.little)
1427 #ifdef BSWAP4
1428                         ctx->Yi.d[3] = BSWAP4(ctr);
1429 #else
1430                         PUTU32(ctx->Yi.c+12,ctr);
1431 #endif
1432                 else
1433                         ctx->Yi.d[3] = ctr;
1434                 while (len--) {
1435                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1436                         ++n;
1437                 }
1438         }
1439
1440         ctx->mres = n;
1441         return 0;
1442 }
1443
1444 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1445                 const unsigned char *in, unsigned char *out,
1446                 size_t len,ctr128_f stream)
1447 {
1448         const union { long one; char little; } is_endian = {1};
1449         unsigned int n, ctr;
1450         size_t i;
1451         u64   mlen = ctx->len.u[1];
1452         void *key  = ctx->key;
1453 #ifdef GCM_FUNCREF_4BIT
1454         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1455 # ifdef GHASH
1456         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1457                                 const u8 *inp,size_t len)       = ctx->ghash;
1458 # endif
1459 #endif
1460
1461         mlen += len;
1462         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1463                 return -1;
1464         ctx->len.u[1] = mlen;
1465
1466         if (ctx->ares) {
1467                 /* First call to decrypt finalizes GHASH(AAD) */
1468                 GCM_MUL(ctx,Xi);
1469                 ctx->ares = 0;
1470         }
1471
1472         if (is_endian.little)
1473 #ifdef BSWAP4
1474                 ctr = BSWAP4(ctx->Yi.d[3]);
1475 #else
1476                 ctr = GETU32(ctx->Yi.c+12);
1477 #endif
1478         else
1479                 ctr = ctx->Yi.d[3];
1480
1481         n = ctx->mres;
1482         if (n) {
1483                 while (n && len) {
1484                         u8 c = *(in++);
1485                         *(out++) = c^ctx->EKi.c[n];
1486                         ctx->Xi.c[n] ^= c;
1487                         --len;
1488                         n = (n+1)%16;
1489                 }
1490                 if (n==0) GCM_MUL (ctx,Xi);
1491                 else {
1492                         ctx->mres = n;
1493                         return 0;
1494                 }
1495         }
1496 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1497         while (len>=GHASH_CHUNK) {
1498                 GHASH(ctx,in,GHASH_CHUNK);
1499                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1500                 ctr += GHASH_CHUNK/16;
1501                 if (is_endian.little)
1502 #ifdef BSWAP4
1503                         ctx->Yi.d[3] = BSWAP4(ctr);
1504 #else
1505                         PUTU32(ctx->Yi.c+12,ctr);
1506 #endif
1507                 else
1508                         ctx->Yi.d[3] = ctr;
1509                 out += GHASH_CHUNK;
1510                 in  += GHASH_CHUNK;
1511                 len -= GHASH_CHUNK;
1512         }
1513 #endif
1514         if ((i = (len&(size_t)-16))) {
1515                 size_t j=i/16;
1516
1517 #if defined(GHASH)
1518                 GHASH(ctx,in,i);
1519 #else
1520                 while (j--) {
1521                         size_t k;
1522                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1523                         GCM_MUL(ctx,Xi);
1524                         in += 16;
1525                 }
1526                 j   = i/16;
1527                 in -= i;
1528 #endif
1529                 (*stream)(in,out,j,key,ctx->Yi.c);
1530                 ctr += (unsigned int)j;
1531                 if (is_endian.little)
1532 #ifdef BSWAP4
1533                         ctx->Yi.d[3] = BSWAP4(ctr);
1534 #else
1535                         PUTU32(ctx->Yi.c+12,ctr);
1536 #endif
1537                 else
1538                         ctx->Yi.d[3] = ctr;
1539                 out += i;
1540                 in  += i;
1541                 len -= i;
1542         }
1543         if (len) {
1544                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1545                 ++ctr;
1546                 if (is_endian.little)
1547 #ifdef BSWAP4
1548                         ctx->Yi.d[3] = BSWAP4(ctr);
1549 #else
1550                         PUTU32(ctx->Yi.c+12,ctr);
1551 #endif
1552                 else
1553                         ctx->Yi.d[3] = ctr;
1554                 while (len--) {
1555                         u8 c = in[n];
1556                         ctx->Xi.c[n] ^= c;
1557                         out[n] = c^ctx->EKi.c[n];
1558                         ++n;
1559                 }
1560         }
1561
1562         ctx->mres = n;
1563         return 0;
1564 }
1565
1566 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1567                         size_t len)
1568 {
1569         const union { long one; char little; } is_endian = {1};
1570         u64 alen = ctx->len.u[0]<<3;
1571         u64 clen = ctx->len.u[1]<<3;
1572 #ifdef GCM_FUNCREF_4BIT
1573         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1574 #endif
1575
1576         if (ctx->mres || ctx->ares)
1577                 GCM_MUL(ctx,Xi);
1578
1579         if (is_endian.little) {
1580 #ifdef BSWAP8
1581                 alen = BSWAP8(alen);
1582                 clen = BSWAP8(clen);
1583 #else
1584                 u8 *p = ctx->len.c;
1585
1586                 ctx->len.u[0] = alen;
1587                 ctx->len.u[1] = clen;
1588
1589                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1590                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1591 #endif
1592         }
1593
1594         ctx->Xi.u[0] ^= alen;
1595         ctx->Xi.u[1] ^= clen;
1596         GCM_MUL(ctx,Xi);
1597
1598         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1599         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1600
1601         if (tag && len<=sizeof(ctx->Xi))
1602                 return memcmp(ctx->Xi.c,tag,len);
1603         else
1604                 return -1;
1605 }
1606
1607 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1608 {
1609         CRYPTO_gcm128_finish(ctx, NULL, 0);
1610         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1611 }
1612
1613 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1614 {
1615         GCM128_CONTEXT *ret;
1616
1617         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1618                 CRYPTO_gcm128_init(ret,key,block);
1619
1620         return ret;
1621 }
1622
1623 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1624 {
1625         if (ctx) {
1626                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1627                 OPENSSL_free(ctx);
1628         }
1629 }
1630
1631 #if defined(SELFTEST)
1632 #include <stdio.h>
1633 #include <openssl/aes.h>
1634
1635 /* Test Case 1 */
1636 static const u8 K1[16],
1637                 *P1=NULL,
1638                 *A1=NULL,
1639                 IV1[12],
1640                 *C1=NULL;
1641 static const u8 T1[]=  {
1642                         0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,
1643                         0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a
1644                         };
1645
1646 /* Test Case 2 */
1647 #define K2 K1
1648 #define A2 A1
1649 #define IV2 IV1
1650 static const u8 P2[16];
1651 static const u8 C2[]=  {
1652                         0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,
1653                         0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78
1654                         };
1655 static const u8 T2[]=  {
1656                         0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,
1657                         0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf
1658                         };
1659
1660 /* Test Case 3 */
1661 #define A3 A2
1662 static const u8 K3[]=  {
1663                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1664                         0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08
1665                         };
1666 static const u8 P3[]=  {
1667                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1668                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1669                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1670                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1671                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1672                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1673                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1674                         0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55
1675                         };
1676 static const u8 IV3[]= {
1677                         0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
1678                         0xde,0xca,0xf8,0x88};
1679 static const u8 C3[]=  {
1680                         0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,
1681                         0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1682                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,
1683                         0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1684                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,
1685                         0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1686                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,
1687                         0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85
1688                         };
1689 static const u8 T3[]=  {
1690                         0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,
1691                         0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4
1692                         };
1693
1694 /* Test Case 4 */
1695 #define K4 K3
1696 #define IV4 IV3
1697 static const u8 P4[]=  {
1698                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1699                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1700                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1701                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1702                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1703                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1704                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1705                         0xba,0x63,0x7b,0x39};
1706 static const u8 A4[]=  {
1707                         0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1708                         0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1709                         0xab,0xad,0xda,0xd2};
1710 static const u8 C4[]=  {
1711                         0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,
1712                         0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1713                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,
1714                         0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1715                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,
1716                         0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1717                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,
1718                         0x3d,0x58,0xe0,0x91
1719                         };
1720 static const u8 T4[]=  {
1721                         0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,
1722                         0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47
1723                         };
1724
1725 /* Test Case 5 */
1726 #define K5 K4
1727 #define P5 P4
1728 #define A5 A4
1729 static const u8 IV5[]= {
1730                         0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad
1731                         };
1732 static const u8 C5[]=  {
1733                         0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,
1734                         0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1735                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,
1736                         0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1737                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,
1738                         0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1739                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,
1740                         0xc2,0x3f,0x45,0x98};
1741 static const u8 T5[]=  {
1742                         0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,
1743                         0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb
1744                         };
1745
1746 /* Test Case 6 */
1747 #define K6 K5
1748 #define P6 P5
1749 #define A6 A5
1750 static const u8 IV6[]= {
1751                         0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,
1752                         0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1753                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,
1754                         0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1755                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,
1756                         0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1757                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,
1758                         0xa6,0x37,0xb3,0x9b
1759                         };
1760 static const u8 C6[]=  {
1761                         0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,
1762                         0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1763                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,
1764                         0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1765                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,
1766                         0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1767                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,
1768                         0x4c,0x34,0xae,0xe5
1769                         };
1770 static const u8 T6[]=  {
1771                         0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,
1772                         0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50
1773                         };
1774
1775 /* Test Case 7 */
1776 static const u8 K7[24],
1777                 *P7=NULL,
1778                 *A7=NULL,
1779                 IV7[12],
1780                 *C7=NULL;
1781 static const u8 T7[]=  {
1782                         0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,
1783                         0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35
1784                         };
1785
1786 /* Test Case 8 */
1787 #define K8 K7
1788 #define IV8 IV7
1789 #define A8 A7
1790 static const u8 P8[16];
1791 static const u8 C8[]=  {
1792                         0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,
1793                         0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00
1794                         };
1795 static const u8 T8[]=  {
1796                         0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,
1797                         0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb
1798                         };
1799
1800 /* Test Case 9 */
1801 #define A9 A8
1802 static const u8 K9[]=  {
1803                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1804                         0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1805                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c
1806                         };
1807 static const u8 P9[]=  {
1808                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1809                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1810                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1811                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1812                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1813                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1814                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1815                         0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55
1816                         };
1817 static const u8 IV9[]= {
1818                         0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
1819                         0xde,0xca,0xf8,0x88
1820                         };
1821 static const u8 C9[]=  {
1822                         0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,
1823                         0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1824                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,
1825                         0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1826                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,
1827                         0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1828                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,
1829                         0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56
1830                         };
1831 static const u8 T9[]=  {
1832                         0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,
1833                         0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14
1834                         };
1835
1836 /* Test Case 10 */
1837 #define K10 K9
1838 #define IV10 IV9
1839 static const u8 P10[]= {
1840                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1841                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1842                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1843                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1844                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1845                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1846                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1847                         0xba,0x63,0x7b,0x39
1848                         };
1849 static const u8 A10[]= {
1850                         0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1851                         0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1852                         0xab,0xad,0xda,0xd2
1853                         };
1854 static const u8 C10[]= {
1855                         0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,
1856                         0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1857                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,
1858                         0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1859                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,
1860                         0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1861                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,
1862                         0xcc,0xda,0x27,0x10
1863                         };
1864 static const u8 T10[]= {
1865                         0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,
1866                         0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c
1867                         };
1868
1869 /* Test Case 11 */
1870 #define K11 K10
1871 #define P11 P10
1872 #define A11 A10
1873 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad};
1874 static const u8 C11[]= {
1875                         0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,
1876                         0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1877                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,
1878                         0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1879                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,
1880                         0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1881                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,
1882                         0xa0,0xf0,0x62,0xf7};
1883 static const u8 T11[]= {
1884                         0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,
1885                         0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8
1886                         };
1887
1888 /* Test Case 12 */
1889 #define K12 K11
1890 #define P12 P11
1891 #define A12 A11
1892 static const u8 IV12[]={
1893                         0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,
1894                         0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1895                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,
1896                         0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1897                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,
1898                         0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1899                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,
1900                         0xa6,0x37,0xb3,0x9b
1901                         };
1902 static const u8 C12[]= {
1903                         0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,
1904                         0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1905                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,
1906                         0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1907                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,
1908                         0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1909                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,
1910                         0xe9,0xb7,0x37,0x3b
1911                         };
1912 static const u8 T12[]= {
1913                         0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,
1914                         0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9
1915                         };
1916
1917 /* Test Case 13 */
1918 static const u8 K13[32],
1919                 *P13=NULL,
1920                 *A13=NULL,
1921                 IV13[12],
1922                 *C13=NULL;
1923 static const u8 T13[]= {
1924                         0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,
1925                         0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b
1926                         };
1927
1928 /* Test Case 14 */
1929 #define K14 K13
1930 #define A14 A13
1931 static const u8 P14[16],
1932                 IV14[12];
1933 static const u8 C14[]= {
1934                         0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,
1935                         0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18
1936                         };
1937 static const u8 T14[]= {
1938                         0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,
1939                         0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19
1940                         };
1941
1942 /* Test Case 15 */
1943 #define A15 A14
1944 static const u8 K15[]= {
1945                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1946                         0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1947                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,
1948                         0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08
1949                         };
1950 static const u8 P15[]= {
1951                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1952                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1953                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1954                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1955                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1956                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1957                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1958                         0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55
1959                         };
1960 static const u8 IV15[]={
1961                         0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,
1962                         0xde,0xca,0xf8,0x88
1963                         };
1964 static const u8 C15[]= {
1965                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,
1966                         0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1967                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,
1968                         0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1969                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,
1970                         0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1971                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,
1972                         0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad
1973                         };
1974 static const u8 T15[]= {
1975                         0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,
1976                         0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c
1977                         };
1978
1979 /* Test Case 16 */
1980 #define K16 K15
1981 #define IV16 IV15
1982 static const u8 P16[]= {
1983                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
1984                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1985                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
1986                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1987                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
1988                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1989                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
1990                         0xba,0x63,0x7b,0x39
1991                         };
1992 static const u8 A16[]= {
1993                         0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1994                         0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1995                         0xab,0xad,0xda,0xd2
1996                         };
1997 static const u8 C16[]= {
1998                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,
1999                         0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
2000                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,
2001                         0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
2002                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,
2003                         0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
2004                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,
2005                         0xbc,0xc9,0xf6,0x62
2006                         };
2007 static const u8 T16[]= {
2008                         0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,
2009                         0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b
2010                         };
2011
2012 /* Test Case 17 */
2013 #define K17 K16
2014 #define P17 P16
2015 #define A17 A16
2016 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad};
2017 static const u8 C17[]= {
2018                         0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,
2019                         0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
2020                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,
2021                         0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
2022                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,
2023                         0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
2024                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,
2025                         0xf4,0x7c,0x9b,0x1f
2026                         };
2027 static const u8 T17[]= {
2028                         0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,
2029                         0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2
2030                         };
2031
2032 /* Test Case 18 */
2033 #define K18 K17
2034 #define P18 P17
2035 #define A18 A17
2036 static const u8 IV18[]={
2037                         0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,
2038                         0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
2039                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,
2040                         0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
2041                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,
2042                         0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
2043                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,
2044                         0xa6,0x37,0xb3,0x9b
2045                         };
2046 static const u8 C18[]= {
2047                         0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,
2048                         0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
2049                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,
2050                         0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
2051                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,
2052                         0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
2053                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,
2054                         0x44,0xae,0x7e,0x3f
2055                         };
2056 static const u8 T18[]= {
2057                         0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,
2058                         0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a
2059                         };
2060
2061 /* Test Case 19 */
2062 #define K19 K1
2063 #define P19 P1
2064 #define IV19 IV1
2065 #define C19 C1
2066 static const u8 A19[]= {
2067                         0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,
2068                         0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
2069                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,
2070                         0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
2071                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,
2072                         0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
2073                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,
2074                         0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
2075                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,
2076                         0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
2077                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,
2078                         0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
2079                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,
2080                         0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
2081                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,
2082                         0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad
2083                         };
2084 static const u8 T19[]= {
2085                         0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,
2086                         0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92
2087                         };
2088
2089 /* Test Case 20 */
2090 #define K20 K1
2091 #define A20 A1
2092 static const u8 IV20[64]={0xff,0xff,0xff,0xff}; /* this results in 0xff in counter LSB */
2093 static const u8 P20[288];
2094 static const u8 C20[]= {
2095                         0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,
2096                         0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
2097                         0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,
2098                         0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
2099                         0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,
2100                         0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
2101                         0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,
2102                         0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
2103                         0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,
2104                         0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
2105                         0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,
2106                         0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
2107                         0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,
2108                         0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
2109                         0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,
2110                         0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
2111                         0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,
2112                         0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
2113                         0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,
2114                         0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
2115                         0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,
2116                         0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
2117                         0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,
2118                         0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
2119                         0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,
2120                         0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
2121                         0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,
2122                         0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
2123                         0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,
2124                         0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
2125                         0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,
2126                         0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
2127                         0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,
2128                         0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
2129                         0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,
2130                         0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c
2131                         };
2132 static const u8 T20[]= {
2133                         0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,
2134                         0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f
2135                         };
2136
2137 #define TEST_CASE(n)    do {                                    \
2138         u8 out[sizeof(P##n)];                                   \
2139         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
2140         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
2141         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2142         memset(out,0,sizeof(out));                              \
2143         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2144         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
2145         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2146             (C##n && memcmp(out,C##n,sizeof(out))))             \
2147                 ret++, printf ("encrypt test#%d failed.\n",n);  \
2148         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
2149         memset(out,0,sizeof(out));                              \
2150         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
2151         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
2152         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
2153             (P##n && memcmp(out,P##n,sizeof(out))))             \
2154                 ret++, printf ("decrypt test#%d failed.\n",n);  \
2155         } while(0)
2156
2157 int main()
2158 {
2159         GCM128_CONTEXT ctx;
2160         AES_KEY key;
2161         int ret=0;
2162
2163         TEST_CASE(1);
2164         TEST_CASE(2);
2165         TEST_CASE(3);
2166         TEST_CASE(4);
2167         TEST_CASE(5);
2168         TEST_CASE(6);
2169         TEST_CASE(7);
2170         TEST_CASE(8);
2171         TEST_CASE(9);
2172         TEST_CASE(10);
2173         TEST_CASE(11);
2174         TEST_CASE(12);
2175         TEST_CASE(13);
2176         TEST_CASE(14);
2177         TEST_CASE(15);
2178         TEST_CASE(16);
2179         TEST_CASE(17);
2180         TEST_CASE(18);
2181         TEST_CASE(19);
2182         TEST_CASE(20);
2183
2184 #ifdef OPENSSL_CPUID_OBJ
2185         {
2186         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
2187         union { u64 u; u8 c[1024]; } buf;
2188         int i;
2189
2190         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
2191         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
2192         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
2193
2194         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
2195         start = OPENSSL_rdtsc();
2196         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
2197         gcm_t = OPENSSL_rdtsc() - start;
2198
2199         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
2200                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2201                         (block128_f)AES_encrypt);
2202         start = OPENSSL_rdtsc();
2203         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
2204                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2205                         (block128_f)AES_encrypt);
2206         ctr_t = OPENSSL_rdtsc() - start;
2207
2208         printf("%.2f-%.2f=%.2f\n",
2209                         gcm_t/(double)sizeof(buf),
2210                         ctr_t/(double)sizeof(buf),
2211                         (gcm_t-ctr_t)/(double)sizeof(buf));
2212 #ifdef GHASH
2213         {
2214         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
2215                                 const u8 *inp,size_t len)       = ctx.ghash;
2216
2217         GHASH((&ctx),buf.c,sizeof(buf));
2218         start = OPENSSL_rdtsc();
2219         for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
2220         gcm_t = OPENSSL_rdtsc() - start;
2221         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
2222         }
2223 #endif
2224         }
2225 #endif
2226
2227         return ret;
2228 }
2229 #endif