remove OPENSSL_FIPSAPI
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if     TABLE_BITS==8
120
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123         int  i, j;
124         u128 V;
125
126         Htable[0].hi = 0;
127         Htable[0].lo = 0;
128         V.hi = H[0];
129         V.lo = H[1];
130
131         for (Htable[128]=V, i=64; i>0; i>>=1) {
132                 REDUCE1BIT(V);
133                 Htable[i] = V;
134         }
135
136         for (i=2; i<256; i<<=1) {
137                 u128 *Hi = Htable+i, H0 = *Hi;
138                 for (j=1; j<i; ++j) {
139                         Hi[j].hi = H0.hi^Htable[j].hi;
140                         Hi[j].lo = H0.lo^Htable[j].lo;
141                 }
142         }
143 }
144
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147         u128 Z = { 0, 0};
148         const u8 *xi = (const u8 *)Xi+15;
149         size_t rem, n = *xi;
150         const union { long one; char little; } is_endian = {1};
151         static const size_t rem_8bit[256] = {
152                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217         while (1) {
218                 Z.hi ^= Htable[n].hi;
219                 Z.lo ^= Htable[n].lo;
220
221                 if ((u8 *)Xi==xi)       break;
222
223                 n = *(--xi);
224
225                 rem  = (size_t)Z.lo&0xff;
226                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227                 Z.hi = (Z.hi>>8);
228                 if (sizeof(size_t)==8)
229                         Z.hi ^= rem_8bit[rem];
230                 else
231                         Z.hi ^= (u64)rem_8bit[rem]<<32;
232         }
233
234         if (is_endian.little) {
235 #ifdef BSWAP8
236                 Xi[0] = BSWAP8(Z.hi);
237                 Xi[1] = BSWAP8(Z.lo);
238 #else
239                 u8 *p = (u8 *)Xi;
240                 u32 v;
241                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
242                 v = (u32)(Z.hi);        PUTU32(p+4,v);
243                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
244                 v = (u32)(Z.lo);        PUTU32(p+12,v);
245 #endif
246         }
247         else {
248                 Xi[0] = Z.hi;
249                 Xi[1] = Z.lo;
250         }
251 }
252 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254 #elif   TABLE_BITS==4
255
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258         u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260         int  i;
261 #endif
262
263         Htable[0].hi = 0;
264         Htable[0].lo = 0;
265         V.hi = H[0];
266         V.lo = H[1];
267
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269         for (Htable[8]=V, i=4; i>0; i>>=1) {
270                 REDUCE1BIT(V);
271                 Htable[i] = V;
272         }
273
274         for (i=2; i<16; i<<=1) {
275                 u128 *Hi = Htable+i;
276                 int   j;
277                 for (V=*Hi, j=1; j<i; ++j) {
278                         Hi[j].hi = V.hi^Htable[j].hi;
279                         Hi[j].lo = V.lo^Htable[j].lo;
280                 }
281         }
282 #else
283         Htable[8] = V;
284         REDUCE1BIT(V);
285         Htable[4] = V;
286         REDUCE1BIT(V);
287         Htable[2] = V;
288         REDUCE1BIT(V);
289         Htable[1] = V;
290         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
291         V=Htable[4];
292         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
293         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
294         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
295         V=Htable[8];
296         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
297         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305         /*
306          * ARM assembler expects specific dword order in Htable.
307          */
308         {
309         int j;
310         const union { long one; char little; } is_endian = {1};
311
312         if (is_endian.little)
313                 for (j=0;j<16;++j) {
314                         V = Htable[j];
315                         Htable[j].hi = V.lo;
316                         Htable[j].lo = V.hi;
317                 }
318         else
319                 for (j=0;j<16;++j) {
320                         V = Htable[j];
321                         Htable[j].hi = V.lo<<32|V.lo>>32;
322                         Htable[j].lo = V.hi<<32|V.hi>>32;
323                 }
324         }
325 #endif
326 }
327
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337         u128 Z;
338         int cnt = 15;
339         size_t rem, nlo, nhi;
340         const union { long one; char little; } is_endian = {1};
341
342         nlo  = ((const u8 *)Xi)[15];
343         nhi  = nlo>>4;
344         nlo &= 0xf;
345
346         Z.hi = Htable[nlo].hi;
347         Z.lo = Htable[nlo].lo;
348
349         while (1) {
350                 rem  = (size_t)Z.lo&0xf;
351                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352                 Z.hi = (Z.hi>>4);
353                 if (sizeof(size_t)==8)
354                         Z.hi ^= rem_4bit[rem];
355                 else
356                         Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358                 Z.hi ^= Htable[nhi].hi;
359                 Z.lo ^= Htable[nhi].lo;
360
361                 if (--cnt<0)            break;
362
363                 nlo  = ((const u8 *)Xi)[cnt];
364                 nhi  = nlo>>4;
365                 nlo &= 0xf;
366
367                 rem  = (size_t)Z.lo&0xf;
368                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369                 Z.hi = (Z.hi>>4);
370                 if (sizeof(size_t)==8)
371                         Z.hi ^= rem_4bit[rem];
372                 else
373                         Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375                 Z.hi ^= Htable[nlo].hi;
376                 Z.lo ^= Htable[nlo].lo;
377         }
378
379         if (is_endian.little) {
380 #ifdef BSWAP8
381                 Xi[0] = BSWAP8(Z.hi);
382                 Xi[1] = BSWAP8(Z.lo);
383 #else
384                 u8 *p = (u8 *)Xi;
385                 u32 v;
386                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
387                 v = (u32)(Z.hi);        PUTU32(p+4,v);
388                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
389                 v = (u32)(Z.lo);        PUTU32(p+12,v);
390 #endif
391         }
392         else {
393                 Xi[0] = Z.hi;
394                 Xi[1] = Z.lo;
395         }
396 }
397
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401  * details... Compiler-generated code doesn't seem to give any
402  * performance improvement, at least not on x86[_64]. It's here
403  * mostly as reference and a placeholder for possible future
404  * non-trivial optimization[s]...
405  */
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407                                 const u8 *inp,size_t len)
408 {
409     u128 Z;
410     int cnt;
411     size_t rem, nlo, nhi;
412     const union { long one; char little; } is_endian = {1};
413
414 #if 1
415     do {
416         cnt  = 15;
417         nlo  = ((const u8 *)Xi)[15];
418         nlo ^= inp[15];
419         nhi  = nlo>>4;
420         nlo &= 0xf;
421
422         Z.hi = Htable[nlo].hi;
423         Z.lo = Htable[nlo].lo;
424
425         while (1) {
426                 rem  = (size_t)Z.lo&0xf;
427                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428                 Z.hi = (Z.hi>>4);
429                 if (sizeof(size_t)==8)
430                         Z.hi ^= rem_4bit[rem];
431                 else
432                         Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434                 Z.hi ^= Htable[nhi].hi;
435                 Z.lo ^= Htable[nhi].lo;
436
437                 if (--cnt<0)            break;
438
439                 nlo  = ((const u8 *)Xi)[cnt];
440                 nlo ^= inp[cnt];
441                 nhi  = nlo>>4;
442                 nlo &= 0xf;
443
444                 rem  = (size_t)Z.lo&0xf;
445                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446                 Z.hi = (Z.hi>>4);
447                 if (sizeof(size_t)==8)
448                         Z.hi ^= rem_4bit[rem];
449                 else
450                         Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452                 Z.hi ^= Htable[nlo].hi;
453                 Z.lo ^= Htable[nlo].lo;
454         }
455 #else
456     /*
457      * Extra 256+16 bytes per-key plus 512 bytes shared tables
458      * [should] give ~50% improvement... One could have PACK()-ed
459      * the rem_8bit even here, but the priority is to minimize
460      * cache footprint...
461      */ 
462     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
463     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
464     static const unsigned short rem_8bit[256] = {
465         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497     /*
498      * This pre-processing phase slows down procedure by approximately
499      * same time as it makes each loop spin faster. In other words
500      * single block performance is approximately same as straightforward
501      * "4-bit" implementation, and then it goes only faster...
502      */
503     for (cnt=0; cnt<16; ++cnt) {
504         Z.hi = Htable[cnt].hi;
505         Z.lo = Htable[cnt].lo;
506         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507         Hshr4[cnt].hi = (Z.hi>>4);
508         Hshl4[cnt]    = (u8)(Z.lo<<4);
509     }
510
511     do {
512         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513                 nlo  = ((const u8 *)Xi)[cnt];
514                 nlo ^= inp[cnt];
515                 nhi  = nlo>>4;
516                 nlo &= 0xf;
517
518                 Z.hi ^= Htable[nlo].hi;
519                 Z.lo ^= Htable[nlo].lo;
520
521                 rem = (size_t)Z.lo&0xff;
522
523                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524                 Z.hi = (Z.hi>>8);
525
526                 Z.hi ^= Hshr4[nhi].hi;
527                 Z.lo ^= Hshr4[nhi].lo;
528                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529         }
530
531         nlo  = ((const u8 *)Xi)[0];
532         nlo ^= inp[0];
533         nhi  = nlo>>4;
534         nlo &= 0xf;
535
536         Z.hi ^= Htable[nlo].hi;
537         Z.lo ^= Htable[nlo].lo;
538
539         rem = (size_t)Z.lo&0xf;
540
541         Z.lo = (Z.hi<<60)|(Z.lo>>4);
542         Z.hi = (Z.hi>>4);
543
544         Z.hi ^= Htable[nhi].hi;
545         Z.lo ^= Htable[nhi].lo;
546         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548
549         if (is_endian.little) {
550 #ifdef BSWAP8
551                 Xi[0] = BSWAP8(Z.hi);
552                 Xi[1] = BSWAP8(Z.lo);
553 #else
554                 u8 *p = (u8 *)Xi;
555                 u32 v;
556                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
557                 v = (u32)(Z.hi);        PUTU32(p+4,v);
558                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
559                 v = (u32)(Z.lo);        PUTU32(p+12,v);
560 #endif
561         }
562         else {
563                 Xi[0] = Z.hi;
564                 Xi[1] = Z.lo;
565         }
566     } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573
574 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578  * trashing effect. In other words idea is to hash data while it's
579  * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK       (3*1024)
581 #endif
582
583 #else   /* TABLE_BITS */
584
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587         u128 V,Z = { 0,0 };
588         long X;
589         int  i,j;
590         const long *xi = (const long *)Xi;
591         const union { long one; char little; } is_endian = {1};
592
593         V.hi = H[0];    /* H is in host byte order, no byte swapping */
594         V.lo = H[1];
595
596         for (j=0; j<16/sizeof(long); ++j) {
597                 if (is_endian.little) {
598                         if (sizeof(long)==8) {
599 #ifdef BSWAP8
600                                 X = (long)(BSWAP8(xi[j]));
601 #else
602                                 const u8 *p = (const u8 *)(xi+j);
603                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605                         }
606                         else {
607                                 const u8 *p = (const u8 *)(xi+j);
608                                 X = (long)GETU32(p);
609                         }
610                 }
611                 else
612                         X = xi[j];
613
614                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615                         u64 M = (u64)(X>>(8*sizeof(long)-1));
616                         Z.hi ^= V.hi&M;
617                         Z.lo ^= V.lo&M;
618
619                         REDUCE1BIT(V);
620                 }
621         }
622
623         if (is_endian.little) {
624 #ifdef BSWAP8
625                 Xi[0] = BSWAP8(Z.hi);
626                 Xi[1] = BSWAP8(Z.lo);
627 #else
628                 u8 *p = (u8 *)Xi;
629                 u32 v;
630                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
631                 v = (u32)(Z.hi);        PUTU32(p+4,v);
632                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
633                 v = (u32)(Z.lo);        PUTU32(p+12,v);
634 #endif
635         }
636         else {
637                 Xi[0] = Z.hi;
638                 Xi[1] = Z.lo;
639         }
640 }
641 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643 #endif
644
645 #if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
646 # if    !defined(I386_ONLY) && \
647         (defined(__i386)        || defined(__i386__)    || \
648          defined(__x86_64)      || defined(__x86_64__)  || \
649          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
650 #  define GHASH_ASM_X86_OR_64
651 #  define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define gcm_init_avx   gcm_init_clmul
660 # define gcm_gmult_avx  gcm_gmult_clmul
661 # define gcm_ghash_avx  gcm_ghash_clmul
662 #else
663 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666 #endif
667
668 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
669 #   define GHASH_ASM_X86
670 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
672
673 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675 #  endif
676 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
677 #  include "arm_arch.h"
678 #  if __ARM_ARCH__>=7
679 #   define GHASH_ASM_ARM
680 #   define GCM_FUNCREF_4BIT
681 #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
682 #   if defined(__arm__) || defined(__arm)
683 #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
684 #   endif
685 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
686 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
687 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
688 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
691 #  endif
692 # elif defined(__sparc__) || defined(__sparc)
693 #  include "sparc_arch.h"
694 #  define GHASH_ASM_SPARC
695 #  define GCM_FUNCREF_4BIT
696 extern unsigned int OPENSSL_sparcv9cap_P[];
697 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
698 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
699 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
700 #elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
701 #  include "ppc_arch.h"
702 #  define GHASH_ASM_PPC
703 #  define GCM_FUNCREF_4BIT
704 void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
705 void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
706 void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
707 # endif
708 #endif
709
710 #ifdef GCM_FUNCREF_4BIT
711 # undef  GCM_MUL
712 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
713 # ifdef GHASH
714 #  undef  GHASH
715 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
716 # endif
717 #endif
718
719 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
720 {
721         const union { long one; char little; } is_endian = {1};
722
723         memset(ctx,0,sizeof(*ctx));
724         ctx->block = block;
725         ctx->key   = key;
726
727         (*block)(ctx->H.c,ctx->H.c,key);
728
729         if (is_endian.little) {
730                 /* H is stored in host byte order */
731 #ifdef BSWAP8
732                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
733                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
734 #else
735                 u8 *p = ctx->H.c;
736                 u64 hi,lo;
737                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
738                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
739                 ctx->H.u[0] = hi;
740                 ctx->H.u[1] = lo;
741 #endif
742         }
743
744 #if     TABLE_BITS==8
745         gcm_init_8bit(ctx->Htable,ctx->H.u);
746 #elif   TABLE_BITS==4
747 # if    defined(GHASH_ASM_X86_OR_64)
748 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
749         if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
750             OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
751                 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) {  /* AVX+MOVBE */
752                         gcm_init_avx(ctx->Htable,ctx->H.u);
753                         ctx->gmult = gcm_gmult_avx;
754                         ctx->ghash = gcm_ghash_avx;
755                 } else {
756                         gcm_init_clmul(ctx->Htable,ctx->H.u);
757                         ctx->gmult = gcm_gmult_clmul;
758                         ctx->ghash = gcm_ghash_clmul;
759                 }
760                 return;
761         }
762 #  endif
763         gcm_init_4bit(ctx->Htable,ctx->H.u);
764 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
765 #   if  defined(OPENSSL_IA32_SSE2)
766         if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
767 #   else
768         if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
769 #   endif
770                 ctx->gmult = gcm_gmult_4bit_mmx;
771                 ctx->ghash = gcm_ghash_4bit_mmx;
772         } else {
773                 ctx->gmult = gcm_gmult_4bit_x86;
774                 ctx->ghash = gcm_ghash_4bit_x86;
775         }
776 #  else
777         ctx->gmult = gcm_gmult_4bit;
778         ctx->ghash = gcm_ghash_4bit;
779 #  endif
780 # elif  defined(GHASH_ASM_ARM)
781 #  ifdef PMULL_CAPABLE
782         if (PMULL_CAPABLE) {
783                 gcm_init_v8(ctx->Htable,ctx->H.u);
784                 ctx->gmult = gcm_gmult_v8;
785                 ctx->ghash = gcm_ghash_v8;
786         } else
787 #  endif
788 #  ifdef NEON_CAPABLE
789         if (NEON_CAPABLE) {
790                 gcm_init_neon(ctx->Htable,ctx->H.u);
791                 ctx->gmult = gcm_gmult_neon;
792                 ctx->ghash = gcm_ghash_neon;
793         } else
794 #  endif
795         {
796                 gcm_init_4bit(ctx->Htable,ctx->H.u);
797                 ctx->gmult = gcm_gmult_4bit;
798                 ctx->ghash = gcm_ghash_4bit;
799         }
800 # elif  defined(GHASH_ASM_SPARC)
801         if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
802                 gcm_init_vis3(ctx->Htable,ctx->H.u);
803                 ctx->gmult = gcm_gmult_vis3;
804                 ctx->ghash = gcm_ghash_vis3;
805         } else {
806                 gcm_init_4bit(ctx->Htable,ctx->H.u);
807                 ctx->gmult = gcm_gmult_4bit;
808                 ctx->ghash = gcm_ghash_4bit;
809         }
810 # elif  defined(GHASH_ASM_PPC)
811         if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
812                 gcm_init_p8(ctx->Htable,ctx->H.u);
813                 ctx->gmult = gcm_gmult_p8;
814                 ctx->ghash = gcm_ghash_p8;
815         } else {
816                 gcm_init_4bit(ctx->Htable,ctx->H.u);
817                 ctx->gmult = gcm_gmult_4bit;
818                 ctx->ghash = gcm_ghash_4bit;
819         }
820 # else
821         gcm_init_4bit(ctx->Htable,ctx->H.u);
822 # endif
823 #endif
824 }
825
826 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
827 {
828         const union { long one; char little; } is_endian = {1};
829         unsigned int ctr;
830 #ifdef GCM_FUNCREF_4BIT
831         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
832 #endif
833
834         ctx->Yi.u[0]  = 0;
835         ctx->Yi.u[1]  = 0;
836         ctx->Xi.u[0]  = 0;
837         ctx->Xi.u[1]  = 0;
838         ctx->len.u[0] = 0;      /* AAD length */
839         ctx->len.u[1] = 0;      /* message length */
840         ctx->ares = 0;
841         ctx->mres = 0;
842
843         if (len==12) {
844                 memcpy(ctx->Yi.c,iv,12);
845                 ctx->Yi.c[15]=1;
846                 ctr=1;
847         }
848         else {
849                 size_t i;
850                 u64 len0 = len;
851
852                 while (len>=16) {
853                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
854                         GCM_MUL(ctx,Yi);
855                         iv += 16;
856                         len -= 16;
857                 }
858                 if (len) {
859                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
860                         GCM_MUL(ctx,Yi);
861                 }
862                 len0 <<= 3;
863                 if (is_endian.little) {
864 #ifdef BSWAP8
865                         ctx->Yi.u[1]  ^= BSWAP8(len0);
866 #else
867                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
868                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
869                         ctx->Yi.c[10] ^= (u8)(len0>>40);
870                         ctx->Yi.c[11] ^= (u8)(len0>>32);
871                         ctx->Yi.c[12] ^= (u8)(len0>>24);
872                         ctx->Yi.c[13] ^= (u8)(len0>>16);
873                         ctx->Yi.c[14] ^= (u8)(len0>>8);
874                         ctx->Yi.c[15] ^= (u8)(len0);
875 #endif
876                 }
877                 else
878                         ctx->Yi.u[1]  ^= len0;
879
880                 GCM_MUL(ctx,Yi);
881
882                 if (is_endian.little)
883 #ifdef BSWAP4
884                         ctr = BSWAP4(ctx->Yi.d[3]);
885 #else
886                         ctr = GETU32(ctx->Yi.c+12);
887 #endif
888                 else
889                         ctr = ctx->Yi.d[3];
890         }
891
892         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
893         ++ctr;
894         if (is_endian.little)
895 #ifdef BSWAP4
896                 ctx->Yi.d[3] = BSWAP4(ctr);
897 #else
898                 PUTU32(ctx->Yi.c+12,ctr);
899 #endif
900         else
901                 ctx->Yi.d[3] = ctr;
902 }
903
904 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
905 {
906         size_t i;
907         unsigned int n;
908         u64 alen = ctx->len.u[0];
909 #ifdef GCM_FUNCREF_4BIT
910         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
911 # ifdef GHASH
912         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
913                                 const u8 *inp,size_t len)       = ctx->ghash;
914 # endif
915 #endif
916
917         if (ctx->len.u[1]) return -2;
918
919         alen += len;
920         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
921                 return -1;
922         ctx->len.u[0] = alen;
923
924         n = ctx->ares;
925         if (n) {
926                 while (n && len) {
927                         ctx->Xi.c[n] ^= *(aad++);
928                         --len;
929                         n = (n+1)%16;
930                 }
931                 if (n==0) GCM_MUL(ctx,Xi);
932                 else {
933                         ctx->ares = n;
934                         return 0;
935                 }
936         }
937
938 #ifdef GHASH
939         if ((i = (len&(size_t)-16))) {
940                 GHASH(ctx,aad,i);
941                 aad += i;
942                 len -= i;
943         }
944 #else
945         while (len>=16) {
946                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
947                 GCM_MUL(ctx,Xi);
948                 aad += 16;
949                 len -= 16;
950         }
951 #endif
952         if (len) {
953                 n = (unsigned int)len;
954                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
955         }
956
957         ctx->ares = n;
958         return 0;
959 }
960
961 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
962                 const unsigned char *in, unsigned char *out,
963                 size_t len)
964 {
965         const union { long one; char little; } is_endian = {1};
966         unsigned int n, ctr;
967         size_t i;
968         u64        mlen  = ctx->len.u[1];
969         block128_f block = ctx->block;
970         void      *key   = ctx->key;
971 #ifdef GCM_FUNCREF_4BIT
972         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
973 # ifdef GHASH
974         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
975                                 const u8 *inp,size_t len)       = ctx->ghash;
976 # endif
977 #endif
978
979 #if 0
980         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
981 #endif
982         mlen += len;
983         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
984                 return -1;
985         ctx->len.u[1] = mlen;
986
987         if (ctx->ares) {
988                 /* First call to encrypt finalizes GHASH(AAD) */
989                 GCM_MUL(ctx,Xi);
990                 ctx->ares = 0;
991         }
992
993         if (is_endian.little)
994 #ifdef BSWAP4
995                 ctr = BSWAP4(ctx->Yi.d[3]);
996 #else
997                 ctr = GETU32(ctx->Yi.c+12);
998 #endif
999         else
1000                 ctr = ctx->Yi.d[3];
1001
1002         n = ctx->mres;
1003 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1004         if (16%sizeof(size_t) == 0) do {        /* always true actually */
1005                 if (n) {
1006                         while (n && len) {
1007                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1008                                 --len;
1009                                 n = (n+1)%16;
1010                         }
1011                         if (n==0) GCM_MUL(ctx,Xi);
1012                         else {
1013                                 ctx->mres = n;
1014                                 return 0;
1015                         }
1016                 }
1017 #if defined(STRICT_ALIGNMENT)
1018                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1019                         break;
1020 #endif
1021 #if defined(GHASH) && defined(GHASH_CHUNK)
1022                 while (len>=GHASH_CHUNK) {
1023                     size_t j=GHASH_CHUNK;
1024
1025                     while (j) {
1026                         size_t *out_t=(size_t *)out;
1027                         const size_t *in_t=(const size_t *)in;
1028
1029                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1030                         ++ctr;
1031                         if (is_endian.little)
1032 #ifdef BSWAP4
1033                                 ctx->Yi.d[3] = BSWAP4(ctr);
1034 #else
1035                                 PUTU32(ctx->Yi.c+12,ctr);
1036 #endif
1037                         else
1038                                 ctx->Yi.d[3] = ctr;
1039                         for (i=0; i<16/sizeof(size_t); ++i)
1040                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1041                         out += 16;
1042                         in  += 16;
1043                         j   -= 16;
1044                     }
1045                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1046                     len -= GHASH_CHUNK;
1047                 }
1048                 if ((i = (len&(size_t)-16))) {
1049                     size_t j=i;
1050
1051                     while (len>=16) {
1052                         size_t *out_t=(size_t *)out;
1053                         const size_t *in_t=(const size_t *)in;
1054
1055                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1056                         ++ctr;
1057                         if (is_endian.little)
1058 #ifdef BSWAP4
1059                                 ctx->Yi.d[3] = BSWAP4(ctr);
1060 #else
1061                                 PUTU32(ctx->Yi.c+12,ctr);
1062 #endif
1063                         else
1064                                 ctx->Yi.d[3] = ctr;
1065                         for (i=0; i<16/sizeof(size_t); ++i)
1066                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1067                         out += 16;
1068                         in  += 16;
1069                         len -= 16;
1070                     }
1071                     GHASH(ctx,out-j,j);
1072                 }
1073 #else
1074                 while (len>=16) {
1075                         size_t *out_t=(size_t *)out;
1076                         const size_t *in_t=(const size_t *)in;
1077
1078                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1079                         ++ctr;
1080                         if (is_endian.little)
1081 #ifdef BSWAP4
1082                                 ctx->Yi.d[3] = BSWAP4(ctr);
1083 #else
1084                                 PUTU32(ctx->Yi.c+12,ctr);
1085 #endif
1086                         else
1087                                 ctx->Yi.d[3] = ctr;
1088                         for (i=0; i<16/sizeof(size_t); ++i)
1089                                 ctx->Xi.t[i] ^=
1090                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1091                         GCM_MUL(ctx,Xi);
1092                         out += 16;
1093                         in  += 16;
1094                         len -= 16;
1095                 }
1096 #endif
1097                 if (len) {
1098                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1099                         ++ctr;
1100                         if (is_endian.little)
1101 #ifdef BSWAP4
1102                                 ctx->Yi.d[3] = BSWAP4(ctr);
1103 #else
1104                                 PUTU32(ctx->Yi.c+12,ctr);
1105 #endif
1106                         else
1107                                 ctx->Yi.d[3] = ctr;
1108                         while (len--) {
1109                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1110                                 ++n;
1111                         }
1112                 }
1113
1114                 ctx->mres = n;
1115                 return 0;
1116         } while(0);
1117 #endif
1118         for (i=0;i<len;++i) {
1119                 if (n==0) {
1120                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1121                         ++ctr;
1122                         if (is_endian.little)
1123 #ifdef BSWAP4
1124                                 ctx->Yi.d[3] = BSWAP4(ctr);
1125 #else
1126                                 PUTU32(ctx->Yi.c+12,ctr);
1127 #endif
1128                         else
1129                                 ctx->Yi.d[3] = ctr;
1130                 }
1131                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1132                 n = (n+1)%16;
1133                 if (n==0)
1134                         GCM_MUL(ctx,Xi);
1135         }
1136
1137         ctx->mres = n;
1138         return 0;
1139 }
1140
1141 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1142                 const unsigned char *in, unsigned char *out,
1143                 size_t len)
1144 {
1145         const union { long one; char little; } is_endian = {1};
1146         unsigned int n, ctr;
1147         size_t i;
1148         u64        mlen  = ctx->len.u[1];
1149         block128_f block = ctx->block;
1150         void      *key   = ctx->key;
1151 #ifdef GCM_FUNCREF_4BIT
1152         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1153 # ifdef GHASH
1154         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1155                                 const u8 *inp,size_t len)       = ctx->ghash;
1156 # endif
1157 #endif
1158
1159         mlen += len;
1160         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1161                 return -1;
1162         ctx->len.u[1] = mlen;
1163
1164         if (ctx->ares) {
1165                 /* First call to decrypt finalizes GHASH(AAD) */
1166                 GCM_MUL(ctx,Xi);
1167                 ctx->ares = 0;
1168         }
1169
1170         if (is_endian.little)
1171 #ifdef BSWAP4
1172                 ctr = BSWAP4(ctx->Yi.d[3]);
1173 #else
1174                 ctr = GETU32(ctx->Yi.c+12);
1175 #endif
1176         else
1177                 ctr = ctx->Yi.d[3];
1178
1179         n = ctx->mres;
1180 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1181         if (16%sizeof(size_t) == 0) do {        /* always true actually */
1182                 if (n) {
1183                         while (n && len) {
1184                                 u8 c = *(in++);
1185                                 *(out++) = c^ctx->EKi.c[n];
1186                                 ctx->Xi.c[n] ^= c;
1187                                 --len;
1188                                 n = (n+1)%16;
1189                         }
1190                         if (n==0) GCM_MUL (ctx,Xi);
1191                         else {
1192                                 ctx->mres = n;
1193                                 return 0;
1194                         }
1195                 }
1196 #if defined(STRICT_ALIGNMENT)
1197                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1198                         break;
1199 #endif
1200 #if defined(GHASH) && defined(GHASH_CHUNK)
1201                 while (len>=GHASH_CHUNK) {
1202                     size_t j=GHASH_CHUNK;
1203
1204                     GHASH(ctx,in,GHASH_CHUNK);
1205                     while (j) {
1206                         size_t *out_t=(size_t *)out;
1207                         const size_t *in_t=(const size_t *)in;
1208
1209                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1210                         ++ctr;
1211                         if (is_endian.little)
1212 #ifdef BSWAP4
1213                                 ctx->Yi.d[3] = BSWAP4(ctr);
1214 #else
1215                                 PUTU32(ctx->Yi.c+12,ctr);
1216 #endif
1217                         else
1218                                 ctx->Yi.d[3] = ctr;
1219                         for (i=0; i<16/sizeof(size_t); ++i)
1220                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1221                         out += 16;
1222                         in  += 16;
1223                         j   -= 16;
1224                     }
1225                     len -= GHASH_CHUNK;
1226                 }
1227                 if ((i = (len&(size_t)-16))) {
1228                     GHASH(ctx,in,i);
1229                     while (len>=16) {
1230                         size_t *out_t=(size_t *)out;
1231                         const size_t *in_t=(const size_t *)in;
1232
1233                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1234                         ++ctr;
1235                         if (is_endian.little)
1236 #ifdef BSWAP4
1237                                 ctx->Yi.d[3] = BSWAP4(ctr);
1238 #else
1239                                 PUTU32(ctx->Yi.c+12,ctr);
1240 #endif
1241                         else
1242                                 ctx->Yi.d[3] = ctr;
1243                         for (i=0; i<16/sizeof(size_t); ++i)
1244                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1245                         out += 16;
1246                         in  += 16;
1247                         len -= 16;
1248                     }
1249                 }
1250 #else
1251                 while (len>=16) {
1252                         size_t *out_t=(size_t *)out;
1253                         const size_t *in_t=(const size_t *)in;
1254
1255                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1256                         ++ctr;
1257                         if (is_endian.little)
1258 #ifdef BSWAP4
1259                                 ctx->Yi.d[3] = BSWAP4(ctr);
1260 #else
1261                                 PUTU32(ctx->Yi.c+12,ctr);
1262 #endif
1263                         else
1264                                 ctx->Yi.d[3] = ctr;
1265                         for (i=0; i<16/sizeof(size_t); ++i) {
1266                                 size_t c = in[i];
1267                                 out[i] = c^ctx->EKi.t[i];
1268                                 ctx->Xi.t[i] ^= c;
1269                         }
1270                         GCM_MUL(ctx,Xi);
1271                         out += 16;
1272                         in  += 16;
1273                         len -= 16;
1274                 }
1275 #endif
1276                 if (len) {
1277                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1278                         ++ctr;
1279                         if (is_endian.little)
1280 #ifdef BSWAP4
1281                                 ctx->Yi.d[3] = BSWAP4(ctr);
1282 #else
1283                                 PUTU32(ctx->Yi.c+12,ctr);
1284 #endif
1285                         else
1286                                 ctx->Yi.d[3] = ctr;
1287                         while (len--) {
1288                                 u8 c = in[n];
1289                                 ctx->Xi.c[n] ^= c;
1290                                 out[n] = c^ctx->EKi.c[n];
1291                                 ++n;
1292                         }
1293                 }
1294
1295                 ctx->mres = n;
1296                 return 0;
1297         } while(0);
1298 #endif
1299         for (i=0;i<len;++i) {
1300                 u8 c;
1301                 if (n==0) {
1302                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1303                         ++ctr;
1304                         if (is_endian.little)
1305 #ifdef BSWAP4
1306                                 ctx->Yi.d[3] = BSWAP4(ctr);
1307 #else
1308                                 PUTU32(ctx->Yi.c+12,ctr);
1309 #endif
1310                         else
1311                                 ctx->Yi.d[3] = ctr;
1312                 }
1313                 c = in[i];
1314                 out[i] = c^ctx->EKi.c[n];
1315                 ctx->Xi.c[n] ^= c;
1316                 n = (n+1)%16;
1317                 if (n==0)
1318                         GCM_MUL(ctx,Xi);
1319         }
1320
1321         ctx->mres = n;
1322         return 0;
1323 }
1324
1325 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1326                 const unsigned char *in, unsigned char *out,
1327                 size_t len, ctr128_f stream)
1328 {
1329         const union { long one; char little; } is_endian = {1};
1330         unsigned int n, ctr;
1331         size_t i;
1332         u64   mlen = ctx->len.u[1];
1333         void *key  = ctx->key;
1334 #ifdef GCM_FUNCREF_4BIT
1335         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1336 # ifdef GHASH
1337         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1338                                 const u8 *inp,size_t len)       = ctx->ghash;
1339 # endif
1340 #endif
1341
1342         mlen += len;
1343         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1344                 return -1;
1345         ctx->len.u[1] = mlen;
1346
1347         if (ctx->ares) {
1348                 /* First call to encrypt finalizes GHASH(AAD) */
1349                 GCM_MUL(ctx,Xi);
1350                 ctx->ares = 0;
1351         }
1352
1353         if (is_endian.little)
1354 #ifdef BSWAP4
1355                 ctr = BSWAP4(ctx->Yi.d[3]);
1356 #else
1357                 ctr = GETU32(ctx->Yi.c+12);
1358 #endif
1359         else
1360                 ctr = ctx->Yi.d[3];
1361
1362         n = ctx->mres;
1363         if (n) {
1364                 while (n && len) {
1365                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1366                         --len;
1367                         n = (n+1)%16;
1368                 }
1369                 if (n==0) GCM_MUL(ctx,Xi);
1370                 else {
1371                         ctx->mres = n;
1372                         return 0;
1373                 }
1374         }
1375 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1376         while (len>=GHASH_CHUNK) {
1377                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1378                 ctr += GHASH_CHUNK/16;
1379                 if (is_endian.little)
1380 #ifdef BSWAP4
1381                         ctx->Yi.d[3] = BSWAP4(ctr);
1382 #else
1383                         PUTU32(ctx->Yi.c+12,ctr);
1384 #endif
1385                 else
1386                         ctx->Yi.d[3] = ctr;
1387                 GHASH(ctx,out,GHASH_CHUNK);
1388                 out += GHASH_CHUNK;
1389                 in  += GHASH_CHUNK;
1390                 len -= GHASH_CHUNK;
1391         }
1392 #endif
1393         if ((i = (len&(size_t)-16))) {
1394                 size_t j=i/16;
1395
1396                 (*stream)(in,out,j,key,ctx->Yi.c);
1397                 ctr += (unsigned int)j;
1398                 if (is_endian.little)
1399 #ifdef BSWAP4
1400                         ctx->Yi.d[3] = BSWAP4(ctr);
1401 #else
1402                         PUTU32(ctx->Yi.c+12,ctr);
1403 #endif
1404                 else
1405                         ctx->Yi.d[3] = ctr;
1406                 in  += i;
1407                 len -= i;
1408 #if defined(GHASH)
1409                 GHASH(ctx,out,i);
1410                 out += i;
1411 #else
1412                 while (j--) {
1413                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1414                         GCM_MUL(ctx,Xi);
1415                         out += 16;
1416                 }
1417 #endif
1418         }
1419         if (len) {
1420                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1421                 ++ctr;
1422                 if (is_endian.little)
1423 #ifdef BSWAP4
1424                         ctx->Yi.d[3] = BSWAP4(ctr);
1425 #else
1426                         PUTU32(ctx->Yi.c+12,ctr);
1427 #endif
1428                 else
1429                         ctx->Yi.d[3] = ctr;
1430                 while (len--) {
1431                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1432                         ++n;
1433                 }
1434         }
1435
1436         ctx->mres = n;
1437         return 0;
1438 }
1439
1440 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1441                 const unsigned char *in, unsigned char *out,
1442                 size_t len,ctr128_f stream)
1443 {
1444         const union { long one; char little; } is_endian = {1};
1445         unsigned int n, ctr;
1446         size_t i;
1447         u64   mlen = ctx->len.u[1];
1448         void *key  = ctx->key;
1449 #ifdef GCM_FUNCREF_4BIT
1450         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1451 # ifdef GHASH
1452         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1453                                 const u8 *inp,size_t len)       = ctx->ghash;
1454 # endif
1455 #endif
1456
1457         mlen += len;
1458         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1459                 return -1;
1460         ctx->len.u[1] = mlen;
1461
1462         if (ctx->ares) {
1463                 /* First call to decrypt finalizes GHASH(AAD) */
1464                 GCM_MUL(ctx,Xi);
1465                 ctx->ares = 0;
1466         }
1467
1468         if (is_endian.little)
1469 #ifdef BSWAP4
1470                 ctr = BSWAP4(ctx->Yi.d[3]);
1471 #else
1472                 ctr = GETU32(ctx->Yi.c+12);
1473 #endif
1474         else
1475                 ctr = ctx->Yi.d[3];
1476
1477         n = ctx->mres;
1478         if (n) {
1479                 while (n && len) {
1480                         u8 c = *(in++);
1481                         *(out++) = c^ctx->EKi.c[n];
1482                         ctx->Xi.c[n] ^= c;
1483                         --len;
1484                         n = (n+1)%16;
1485                 }
1486                 if (n==0) GCM_MUL (ctx,Xi);
1487                 else {
1488                         ctx->mres = n;
1489                         return 0;
1490                 }
1491         }
1492 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1493         while (len>=GHASH_CHUNK) {
1494                 GHASH(ctx,in,GHASH_CHUNK);
1495                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1496                 ctr += GHASH_CHUNK/16;
1497                 if (is_endian.little)
1498 #ifdef BSWAP4
1499                         ctx->Yi.d[3] = BSWAP4(ctr);
1500 #else
1501                         PUTU32(ctx->Yi.c+12,ctr);
1502 #endif
1503                 else
1504                         ctx->Yi.d[3] = ctr;
1505                 out += GHASH_CHUNK;
1506                 in  += GHASH_CHUNK;
1507                 len -= GHASH_CHUNK;
1508         }
1509 #endif
1510         if ((i = (len&(size_t)-16))) {
1511                 size_t j=i/16;
1512
1513 #if defined(GHASH)
1514                 GHASH(ctx,in,i);
1515 #else
1516                 while (j--) {
1517                         size_t k;
1518                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1519                         GCM_MUL(ctx,Xi);
1520                         in += 16;
1521                 }
1522                 j   = i/16;
1523                 in -= i;
1524 #endif
1525                 (*stream)(in,out,j,key,ctx->Yi.c);
1526                 ctr += (unsigned int)j;
1527                 if (is_endian.little)
1528 #ifdef BSWAP4
1529                         ctx->Yi.d[3] = BSWAP4(ctr);
1530 #else
1531                         PUTU32(ctx->Yi.c+12,ctr);
1532 #endif
1533                 else
1534                         ctx->Yi.d[3] = ctr;
1535                 out += i;
1536                 in  += i;
1537                 len -= i;
1538         }
1539         if (len) {
1540                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1541                 ++ctr;
1542                 if (is_endian.little)
1543 #ifdef BSWAP4
1544                         ctx->Yi.d[3] = BSWAP4(ctr);
1545 #else
1546                         PUTU32(ctx->Yi.c+12,ctr);
1547 #endif
1548                 else
1549                         ctx->Yi.d[3] = ctr;
1550                 while (len--) {
1551                         u8 c = in[n];
1552                         ctx->Xi.c[n] ^= c;
1553                         out[n] = c^ctx->EKi.c[n];
1554                         ++n;
1555                 }
1556         }
1557
1558         ctx->mres = n;
1559         return 0;
1560 }
1561
1562 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1563                         size_t len)
1564 {
1565         const union { long one; char little; } is_endian = {1};
1566         u64 alen = ctx->len.u[0]<<3;
1567         u64 clen = ctx->len.u[1]<<3;
1568 #ifdef GCM_FUNCREF_4BIT
1569         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1570 #endif
1571
1572         if (ctx->mres || ctx->ares)
1573                 GCM_MUL(ctx,Xi);
1574
1575         if (is_endian.little) {
1576 #ifdef BSWAP8
1577                 alen = BSWAP8(alen);
1578                 clen = BSWAP8(clen);
1579 #else
1580                 u8 *p = ctx->len.c;
1581
1582                 ctx->len.u[0] = alen;
1583                 ctx->len.u[1] = clen;
1584
1585                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1586                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1587 #endif
1588         }
1589
1590         ctx->Xi.u[0] ^= alen;
1591         ctx->Xi.u[1] ^= clen;
1592         GCM_MUL(ctx,Xi);
1593
1594         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1595         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1596
1597         if (tag && len<=sizeof(ctx->Xi))
1598                 return memcmp(ctx->Xi.c,tag,len);
1599         else
1600                 return -1;
1601 }
1602
1603 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1604 {
1605         CRYPTO_gcm128_finish(ctx, NULL, 0);
1606         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1607 }
1608
1609 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1610 {
1611         GCM128_CONTEXT *ret;
1612
1613         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1614                 CRYPTO_gcm128_init(ret,key,block);
1615
1616         return ret;
1617 }
1618
1619 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1620 {
1621         if (ctx) {
1622                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1623                 OPENSSL_free(ctx);
1624         }
1625 }
1626
1627 #if defined(SELFTEST)
1628 #include <stdio.h>
1629 #include <openssl/aes.h>
1630
1631 /* Test Case 1 */
1632 static const u8 K1[16],
1633                 *P1=NULL,
1634                 *A1=NULL,
1635                 IV1[12],
1636                 *C1=NULL,
1637                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1638
1639 /* Test Case 2 */
1640 #define K2 K1
1641 #define A2 A1
1642 #define IV2 IV1
1643 static const u8 P2[16],
1644                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1645                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1646
1647 /* Test Case 3 */
1648 #define A3 A2
1649 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1650                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1651                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1652                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1653                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1654                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1655                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1656                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1657                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1658                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1659                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1660
1661 /* Test Case 4 */
1662 #define K4 K3
1663 #define IV4 IV3
1664 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1665                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1666                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1667                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1668                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1669                         0xab,0xad,0xda,0xd2},
1670                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1671                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1672                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1673                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1674                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1675
1676 /* Test Case 5 */
1677 #define K5 K4
1678 #define P5 P4
1679 #define A5 A4
1680 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1681                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1682                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1683                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1684                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1685                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1686
1687 /* Test Case 6 */
1688 #define K6 K5
1689 #define P6 P5
1690 #define A6 A5
1691 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1692                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1693                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1694                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1695                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1696                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1697                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1698                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1699                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1700
1701 /* Test Case 7 */
1702 static const u8 K7[24],
1703                 *P7=NULL,
1704                 *A7=NULL,
1705                 IV7[12],
1706                 *C7=NULL,
1707                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1708
1709 /* Test Case 8 */
1710 #define K8 K7
1711 #define IV8 IV7
1712 #define A8 A7
1713 static const u8 P8[16],
1714                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1715                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1716
1717 /* Test Case 9 */
1718 #define A9 A8
1719 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1720                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1721                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1722                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1723                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1724                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1725                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1726                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1727                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1728                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1729                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1730                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1731
1732 /* Test Case 10 */
1733 #define K10 K9
1734 #define IV10 IV9
1735 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1736                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1737                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1738                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1739                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1740                         0xab,0xad,0xda,0xd2},
1741                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1742                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1743                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1744                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1745                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1746
1747 /* Test Case 11 */
1748 #define K11 K10
1749 #define P11 P10
1750 #define A11 A10
1751 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1752                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1753                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1754                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1755                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1756                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1757
1758 /* Test Case 12 */
1759 #define K12 K11
1760 #define P12 P11
1761 #define A12 A11
1762 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1763                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1764                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1765                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1766                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1767                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1768                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1769                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1770                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1771
1772 /* Test Case 13 */
1773 static const u8 K13[32],
1774                 *P13=NULL,
1775                 *A13=NULL,
1776                 IV13[12],
1777                 *C13=NULL,
1778                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1779
1780 /* Test Case 14 */
1781 #define K14 K13
1782 #define A14 A13
1783 static const u8 P14[16],
1784                 IV14[12],
1785                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1786                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1787
1788 /* Test Case 15 */
1789 #define A15 A14
1790 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1791                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1792                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1793                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1794                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1795                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1796                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1797                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1798                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1799                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1800                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1801                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1802
1803 /* Test Case 16 */
1804 #define K16 K15
1805 #define IV16 IV15
1806 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1807                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1808                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1809                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1810                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1811                         0xab,0xad,0xda,0xd2},
1812                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1813                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1814                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1815                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1816                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1817
1818 /* Test Case 17 */
1819 #define K17 K16
1820 #define P17 P16
1821 #define A17 A16
1822 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1823                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1824                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1825                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1826                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1827                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1828
1829 /* Test Case 18 */
1830 #define K18 K17
1831 #define P18 P17
1832 #define A18 A17
1833 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1834                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1835                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1836                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1837                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1838                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1839                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1840                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1841                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1842
1843 /* Test Case 19 */
1844 #define K19 K1
1845 #define P19 P1
1846 #define IV19 IV1
1847 #define C19 C1
1848 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1849                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1850                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1851                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1852                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1853                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1854                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1855                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1856                 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1857
1858 /* Test Case 20 */
1859 #define K20 K1
1860 #define A20 A1
1861 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1862                 P20[288],
1863                 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1864                         0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1865                         0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1866                         0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1867                         0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1868                         0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1869                         0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1870                         0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1871                         0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1872                         0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1873                         0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1874                         0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1875                         0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1876                         0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1877                         0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1878                         0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1879                         0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1880                         0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1881                 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1882
1883 #define TEST_CASE(n)    do {                                    \
1884         u8 out[sizeof(P##n)];                                   \
1885         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1886         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1887         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1888         memset(out,0,sizeof(out));                              \
1889         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1890         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1891         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1892             (C##n && memcmp(out,C##n,sizeof(out))))             \
1893                 ret++, printf ("encrypt test#%d failed.\n",n);  \
1894         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1895         memset(out,0,sizeof(out));                              \
1896         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1897         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1898         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1899             (P##n && memcmp(out,P##n,sizeof(out))))             \
1900                 ret++, printf ("decrypt test#%d failed.\n",n);  \
1901         } while(0)
1902
1903 int main()
1904 {
1905         GCM128_CONTEXT ctx;
1906         AES_KEY key;
1907         int ret=0;
1908
1909         TEST_CASE(1);
1910         TEST_CASE(2);
1911         TEST_CASE(3);
1912         TEST_CASE(4);
1913         TEST_CASE(5);
1914         TEST_CASE(6);
1915         TEST_CASE(7);
1916         TEST_CASE(8);
1917         TEST_CASE(9);
1918         TEST_CASE(10);
1919         TEST_CASE(11);
1920         TEST_CASE(12);
1921         TEST_CASE(13);
1922         TEST_CASE(14);
1923         TEST_CASE(15);
1924         TEST_CASE(16);
1925         TEST_CASE(17);
1926         TEST_CASE(18);
1927         TEST_CASE(19);
1928         TEST_CASE(20);
1929
1930 #ifdef OPENSSL_CPUID_OBJ
1931         {
1932         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1933         union { u64 u; u8 c[1024]; } buf;
1934         int i;
1935
1936         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1937         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1938         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1939
1940         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1941         start = OPENSSL_rdtsc();
1942         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1943         gcm_t = OPENSSL_rdtsc() - start;
1944
1945         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1946                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1947                         (block128_f)AES_encrypt);
1948         start = OPENSSL_rdtsc();
1949         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1950                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1951                         (block128_f)AES_encrypt);
1952         ctr_t = OPENSSL_rdtsc() - start;
1953
1954         printf("%.2f-%.2f=%.2f\n",
1955                         gcm_t/(double)sizeof(buf),
1956                         ctr_t/(double)sizeof(buf),
1957                         (gcm_t-ctr_t)/(double)sizeof(buf));
1958 #ifdef GHASH
1959         {
1960         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1961                                 const u8 *inp,size_t len)       = ctx.ghash;
1962
1963         GHASH((&ctx),buf.c,sizeof(buf));
1964         start = OPENSSL_rdtsc();
1965         for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1966         gcm_t = OPENSSL_rdtsc() - start;
1967         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1968         }
1969 #endif
1970         }
1971 #endif
1972
1973         return ret;
1974 }
1975 #endif