Merge branch 'no_gmt_unix_time' of git://github.com/nmathewson/openssl into OpenSSL_1...
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if     TABLE_BITS==8
120
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123         int  i, j;
124         u128 V;
125
126         Htable[0].hi = 0;
127         Htable[0].lo = 0;
128         V.hi = H[0];
129         V.lo = H[1];
130
131         for (Htable[128]=V, i=64; i>0; i>>=1) {
132                 REDUCE1BIT(V);
133                 Htable[i] = V;
134         }
135
136         for (i=2; i<256; i<<=1) {
137                 u128 *Hi = Htable+i, H0 = *Hi;
138                 for (j=1; j<i; ++j) {
139                         Hi[j].hi = H0.hi^Htable[j].hi;
140                         Hi[j].lo = H0.lo^Htable[j].lo;
141                 }
142         }
143 }
144
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147         u128 Z = { 0, 0};
148         const u8 *xi = (const u8 *)Xi+15;
149         size_t rem, n = *xi;
150         const union { long one; char little; } is_endian = {1};
151         static const size_t rem_8bit[256] = {
152                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217         while (1) {
218                 Z.hi ^= Htable[n].hi;
219                 Z.lo ^= Htable[n].lo;
220
221                 if ((u8 *)Xi==xi)       break;
222
223                 n = *(--xi);
224
225                 rem  = (size_t)Z.lo&0xff;
226                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227                 Z.hi = (Z.hi>>8);
228                 if (sizeof(size_t)==8)
229                         Z.hi ^= rem_8bit[rem];
230                 else
231                         Z.hi ^= (u64)rem_8bit[rem]<<32;
232         }
233
234         if (is_endian.little) {
235 #ifdef BSWAP8
236                 Xi[0] = BSWAP8(Z.hi);
237                 Xi[1] = BSWAP8(Z.lo);
238 #else
239                 u8 *p = (u8 *)Xi;
240                 u32 v;
241                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
242                 v = (u32)(Z.hi);        PUTU32(p+4,v);
243                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
244                 v = (u32)(Z.lo);        PUTU32(p+12,v);
245 #endif
246         }
247         else {
248                 Xi[0] = Z.hi;
249                 Xi[1] = Z.lo;
250         }
251 }
252 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254 #elif   TABLE_BITS==4
255
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258         u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260         int  i;
261 #endif
262
263         Htable[0].hi = 0;
264         Htable[0].lo = 0;
265         V.hi = H[0];
266         V.lo = H[1];
267
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269         for (Htable[8]=V, i=4; i>0; i>>=1) {
270                 REDUCE1BIT(V);
271                 Htable[i] = V;
272         }
273
274         for (i=2; i<16; i<<=1) {
275                 u128 *Hi = Htable+i;
276                 int   j;
277                 for (V=*Hi, j=1; j<i; ++j) {
278                         Hi[j].hi = V.hi^Htable[j].hi;
279                         Hi[j].lo = V.lo^Htable[j].lo;
280                 }
281         }
282 #else
283         Htable[8] = V;
284         REDUCE1BIT(V);
285         Htable[4] = V;
286         REDUCE1BIT(V);
287         Htable[2] = V;
288         REDUCE1BIT(V);
289         Htable[1] = V;
290         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
291         V=Htable[4];
292         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
293         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
294         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
295         V=Htable[8];
296         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
297         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305         /*
306          * ARM assembler expects specific dword order in Htable.
307          */
308         {
309         int j;
310         const union { long one; char little; } is_endian = {1};
311
312         if (is_endian.little)
313                 for (j=0;j<16;++j) {
314                         V = Htable[j];
315                         Htable[j].hi = V.lo;
316                         Htable[j].lo = V.hi;
317                 }
318         else
319                 for (j=0;j<16;++j) {
320                         V = Htable[j];
321                         Htable[j].hi = V.lo<<32|V.lo>>32;
322                         Htable[j].lo = V.hi<<32|V.hi>>32;
323                 }
324         }
325 #endif
326 }
327
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337         u128 Z;
338         int cnt = 15;
339         size_t rem, nlo, nhi;
340         const union { long one; char little; } is_endian = {1};
341
342         nlo  = ((const u8 *)Xi)[15];
343         nhi  = nlo>>4;
344         nlo &= 0xf;
345
346         Z.hi = Htable[nlo].hi;
347         Z.lo = Htable[nlo].lo;
348
349         while (1) {
350                 rem  = (size_t)Z.lo&0xf;
351                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352                 Z.hi = (Z.hi>>4);
353                 if (sizeof(size_t)==8)
354                         Z.hi ^= rem_4bit[rem];
355                 else
356                         Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358                 Z.hi ^= Htable[nhi].hi;
359                 Z.lo ^= Htable[nhi].lo;
360
361                 if (--cnt<0)            break;
362
363                 nlo  = ((const u8 *)Xi)[cnt];
364                 nhi  = nlo>>4;
365                 nlo &= 0xf;
366
367                 rem  = (size_t)Z.lo&0xf;
368                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369                 Z.hi = (Z.hi>>4);
370                 if (sizeof(size_t)==8)
371                         Z.hi ^= rem_4bit[rem];
372                 else
373                         Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375                 Z.hi ^= Htable[nlo].hi;
376                 Z.lo ^= Htable[nlo].lo;
377         }
378
379         if (is_endian.little) {
380 #ifdef BSWAP8
381                 Xi[0] = BSWAP8(Z.hi);
382                 Xi[1] = BSWAP8(Z.lo);
383 #else
384                 u8 *p = (u8 *)Xi;
385                 u32 v;
386                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
387                 v = (u32)(Z.hi);        PUTU32(p+4,v);
388                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
389                 v = (u32)(Z.lo);        PUTU32(p+12,v);
390 #endif
391         }
392         else {
393                 Xi[0] = Z.hi;
394                 Xi[1] = Z.lo;
395         }
396 }
397
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401  * details... Compiler-generated code doesn't seem to give any
402  * performance improvement, at least not on x86[_64]. It's here
403  * mostly as reference and a placeholder for possible future
404  * non-trivial optimization[s]...
405  */
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407                                 const u8 *inp,size_t len)
408 {
409     u128 Z;
410     int cnt;
411     size_t rem, nlo, nhi;
412     const union { long one; char little; } is_endian = {1};
413
414 #if 1
415     do {
416         cnt  = 15;
417         nlo  = ((const u8 *)Xi)[15];
418         nlo ^= inp[15];
419         nhi  = nlo>>4;
420         nlo &= 0xf;
421
422         Z.hi = Htable[nlo].hi;
423         Z.lo = Htable[nlo].lo;
424
425         while (1) {
426                 rem  = (size_t)Z.lo&0xf;
427                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428                 Z.hi = (Z.hi>>4);
429                 if (sizeof(size_t)==8)
430                         Z.hi ^= rem_4bit[rem];
431                 else
432                         Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434                 Z.hi ^= Htable[nhi].hi;
435                 Z.lo ^= Htable[nhi].lo;
436
437                 if (--cnt<0)            break;
438
439                 nlo  = ((const u8 *)Xi)[cnt];
440                 nlo ^= inp[cnt];
441                 nhi  = nlo>>4;
442                 nlo &= 0xf;
443
444                 rem  = (size_t)Z.lo&0xf;
445                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446                 Z.hi = (Z.hi>>4);
447                 if (sizeof(size_t)==8)
448                         Z.hi ^= rem_4bit[rem];
449                 else
450                         Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452                 Z.hi ^= Htable[nlo].hi;
453                 Z.lo ^= Htable[nlo].lo;
454         }
455 #else
456     /*
457      * Extra 256+16 bytes per-key plus 512 bytes shared tables
458      * [should] give ~50% improvement... One could have PACK()-ed
459      * the rem_8bit even here, but the priority is to minimize
460      * cache footprint...
461      */ 
462     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
463     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
464     static const unsigned short rem_8bit[256] = {
465         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497     /*
498      * This pre-processing phase slows down procedure by approximately
499      * same time as it makes each loop spin faster. In other words
500      * single block performance is approximately same as straightforward
501      * "4-bit" implementation, and then it goes only faster...
502      */
503     for (cnt=0; cnt<16; ++cnt) {
504         Z.hi = Htable[cnt].hi;
505         Z.lo = Htable[cnt].lo;
506         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507         Hshr4[cnt].hi = (Z.hi>>4);
508         Hshl4[cnt]    = (u8)(Z.lo<<4);
509     }
510
511     do {
512         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513                 nlo  = ((const u8 *)Xi)[cnt];
514                 nlo ^= inp[cnt];
515                 nhi  = nlo>>4;
516                 nlo &= 0xf;
517
518                 Z.hi ^= Htable[nlo].hi;
519                 Z.lo ^= Htable[nlo].lo;
520
521                 rem = (size_t)Z.lo&0xff;
522
523                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524                 Z.hi = (Z.hi>>8);
525
526                 Z.hi ^= Hshr4[nhi].hi;
527                 Z.lo ^= Hshr4[nhi].lo;
528                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529         }
530
531         nlo  = ((const u8 *)Xi)[0];
532         nlo ^= inp[0];
533         nhi  = nlo>>4;
534         nlo &= 0xf;
535
536         Z.hi ^= Htable[nlo].hi;
537         Z.lo ^= Htable[nlo].lo;
538
539         rem = (size_t)Z.lo&0xf;
540
541         Z.lo = (Z.hi<<60)|(Z.lo>>4);
542         Z.hi = (Z.hi>>4);
543
544         Z.hi ^= Htable[nhi].hi;
545         Z.lo ^= Htable[nhi].lo;
546         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548
549         if (is_endian.little) {
550 #ifdef BSWAP8
551                 Xi[0] = BSWAP8(Z.hi);
552                 Xi[1] = BSWAP8(Z.lo);
553 #else
554                 u8 *p = (u8 *)Xi;
555                 u32 v;
556                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
557                 v = (u32)(Z.hi);        PUTU32(p+4,v);
558                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
559                 v = (u32)(Z.lo);        PUTU32(p+12,v);
560 #endif
561         }
562         else {
563                 Xi[0] = Z.hi;
564                 Xi[1] = Z.lo;
565         }
566     } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573
574 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578  * trashing effect. In other words idea is to hash data while it's
579  * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK       (3*1024)
581 #endif
582
583 #else   /* TABLE_BITS */
584
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587         u128 V,Z = { 0,0 };
588         long X;
589         int  i,j;
590         const long *xi = (const long *)Xi;
591         const union { long one; char little; } is_endian = {1};
592
593         V.hi = H[0];    /* H is in host byte order, no byte swapping */
594         V.lo = H[1];
595
596         for (j=0; j<16/sizeof(long); ++j) {
597                 if (is_endian.little) {
598                         if (sizeof(long)==8) {
599 #ifdef BSWAP8
600                                 X = (long)(BSWAP8(xi[j]));
601 #else
602                                 const u8 *p = (const u8 *)(xi+j);
603                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605                         }
606                         else {
607                                 const u8 *p = (const u8 *)(xi+j);
608                                 X = (long)GETU32(p);
609                         }
610                 }
611                 else
612                         X = xi[j];
613
614                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615                         u64 M = (u64)(X>>(8*sizeof(long)-1));
616                         Z.hi ^= V.hi&M;
617                         Z.lo ^= V.lo&M;
618
619                         REDUCE1BIT(V);
620                 }
621         }
622
623         if (is_endian.little) {
624 #ifdef BSWAP8
625                 Xi[0] = BSWAP8(Z.hi);
626                 Xi[1] = BSWAP8(Z.lo);
627 #else
628                 u8 *p = (u8 *)Xi;
629                 u32 v;
630                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
631                 v = (u32)(Z.hi);        PUTU32(p+4,v);
632                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
633                 v = (u32)(Z.lo);        PUTU32(p+12,v);
634 #endif
635         }
636         else {
637                 Xi[0] = Z.hi;
638                 Xi[1] = Z.lo;
639         }
640 }
641 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643 #endif
644
645 #if     TABLE_BITS==4 && defined(GHASH_ASM)
646 # if    !defined(I386_ONLY) && \
647         (defined(__i386)        || defined(__i386__)    || \
648          defined(__x86_64)      || defined(__x86_64__)  || \
649          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
650 #  define GHASH_ASM_X86_OR_64
651 #  define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 #   define GHASH_ASM_X86
660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
662
663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665 #  endif
666 # elif defined(__arm__) || defined(__arm)
667 #  include "arm_arch.h"
668 #  if __ARM_ARCH__>=7
669 #   define GHASH_ASM_ARM
670 #   define GCM_FUNCREF_4BIT
671 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
672 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673 #  endif
674 # endif
675 #endif
676
677 #ifdef GCM_FUNCREF_4BIT
678 # undef  GCM_MUL
679 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
680 # ifdef GHASH
681 #  undef  GHASH
682 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
683 # endif
684 #endif
685
686 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
687 {
688         const union { long one; char little; } is_endian = {1};
689
690         memset(ctx,0,sizeof(*ctx));
691         ctx->block = block;
692         ctx->key   = key;
693
694         (*block)(ctx->H.c,ctx->H.c,key);
695
696         if (is_endian.little) {
697                 /* H is stored in host byte order */
698 #ifdef BSWAP8
699                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
700                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
701 #else
702                 u8 *p = ctx->H.c;
703                 u64 hi,lo;
704                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
705                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
706                 ctx->H.u[0] = hi;
707                 ctx->H.u[1] = lo;
708 #endif
709         }
710
711 #if     TABLE_BITS==8
712         gcm_init_8bit(ctx->Htable,ctx->H.u);
713 #elif   TABLE_BITS==4
714 # if    defined(GHASH_ASM_X86_OR_64)
715 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
716         if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
717             OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
718                 gcm_init_clmul(ctx->Htable,ctx->H.u);
719                 ctx->gmult = gcm_gmult_clmul;
720                 ctx->ghash = gcm_ghash_clmul;
721                 return;
722         }
723 #  endif
724         gcm_init_4bit(ctx->Htable,ctx->H.u);
725 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
726 #   if  defined(OPENSSL_IA32_SSE2)
727         if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
728 #   else
729         if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
730 #   endif
731                 ctx->gmult = gcm_gmult_4bit_mmx;
732                 ctx->ghash = gcm_ghash_4bit_mmx;
733         } else {
734                 ctx->gmult = gcm_gmult_4bit_x86;
735                 ctx->ghash = gcm_ghash_4bit_x86;
736         }
737 #  else
738         ctx->gmult = gcm_gmult_4bit;
739         ctx->ghash = gcm_ghash_4bit;
740 #  endif
741 # elif  defined(GHASH_ASM_ARM)
742         if (OPENSSL_armcap_P & ARMV7_NEON) {
743                 ctx->gmult = gcm_gmult_neon;
744                 ctx->ghash = gcm_ghash_neon;
745         } else {
746                 gcm_init_4bit(ctx->Htable,ctx->H.u);
747                 ctx->gmult = gcm_gmult_4bit;
748                 ctx->ghash = gcm_ghash_4bit;
749         }
750 # else
751         gcm_init_4bit(ctx->Htable,ctx->H.u);
752 # endif
753 #endif
754 }
755
756 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
757 {
758         const union { long one; char little; } is_endian = {1};
759         unsigned int ctr;
760 #ifdef GCM_FUNCREF_4BIT
761         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
762 #endif
763
764         ctx->Yi.u[0]  = 0;
765         ctx->Yi.u[1]  = 0;
766         ctx->Xi.u[0]  = 0;
767         ctx->Xi.u[1]  = 0;
768         ctx->len.u[0] = 0;      /* AAD length */
769         ctx->len.u[1] = 0;      /* message length */
770         ctx->ares = 0;
771         ctx->mres = 0;
772
773         if (len==12) {
774                 memcpy(ctx->Yi.c,iv,12);
775                 ctx->Yi.c[15]=1;
776                 ctr=1;
777         }
778         else {
779                 size_t i;
780                 u64 len0 = len;
781
782                 while (len>=16) {
783                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
784                         GCM_MUL(ctx,Yi);
785                         iv += 16;
786                         len -= 16;
787                 }
788                 if (len) {
789                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
790                         GCM_MUL(ctx,Yi);
791                 }
792                 len0 <<= 3;
793                 if (is_endian.little) {
794 #ifdef BSWAP8
795                         ctx->Yi.u[1]  ^= BSWAP8(len0);
796 #else
797                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
798                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
799                         ctx->Yi.c[10] ^= (u8)(len0>>40);
800                         ctx->Yi.c[11] ^= (u8)(len0>>32);
801                         ctx->Yi.c[12] ^= (u8)(len0>>24);
802                         ctx->Yi.c[13] ^= (u8)(len0>>16);
803                         ctx->Yi.c[14] ^= (u8)(len0>>8);
804                         ctx->Yi.c[15] ^= (u8)(len0);
805 #endif
806                 }
807                 else
808                         ctx->Yi.u[1]  ^= len0;
809
810                 GCM_MUL(ctx,Yi);
811
812                 if (is_endian.little)
813                         ctr = GETU32(ctx->Yi.c+12);
814                 else
815                         ctr = ctx->Yi.d[3];
816         }
817
818         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
819         ++ctr;
820         if (is_endian.little)
821                 PUTU32(ctx->Yi.c+12,ctr);
822         else
823                 ctx->Yi.d[3] = ctr;
824 }
825
826 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
827 {
828         size_t i;
829         unsigned int n;
830         u64 alen = ctx->len.u[0];
831 #ifdef GCM_FUNCREF_4BIT
832         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
833 # ifdef GHASH
834         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
835                                 const u8 *inp,size_t len)       = ctx->ghash;
836 # endif
837 #endif
838
839         if (ctx->len.u[1]) return -2;
840
841         alen += len;
842         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
843                 return -1;
844         ctx->len.u[0] = alen;
845
846         n = ctx->ares;
847         if (n) {
848                 while (n && len) {
849                         ctx->Xi.c[n] ^= *(aad++);
850                         --len;
851                         n = (n+1)%16;
852                 }
853                 if (n==0) GCM_MUL(ctx,Xi);
854                 else {
855                         ctx->ares = n;
856                         return 0;
857                 }
858         }
859
860 #ifdef GHASH
861         if ((i = (len&(size_t)-16))) {
862                 GHASH(ctx,aad,i);
863                 aad += i;
864                 len -= i;
865         }
866 #else
867         while (len>=16) {
868                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
869                 GCM_MUL(ctx,Xi);
870                 aad += 16;
871                 len -= 16;
872         }
873 #endif
874         if (len) {
875                 n = (unsigned int)len;
876                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
877         }
878
879         ctx->ares = n;
880         return 0;
881 }
882
883 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
884                 const unsigned char *in, unsigned char *out,
885                 size_t len)
886 {
887         const union { long one; char little; } is_endian = {1};
888         unsigned int n, ctr;
889         size_t i;
890         u64        mlen  = ctx->len.u[1];
891         block128_f block = ctx->block;
892         void      *key   = ctx->key;
893 #ifdef GCM_FUNCREF_4BIT
894         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
895 # ifdef GHASH
896         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
897                                 const u8 *inp,size_t len)       = ctx->ghash;
898 # endif
899 #endif
900
901 #if 0
902         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
903 #endif
904         mlen += len;
905         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906                 return -1;
907         ctx->len.u[1] = mlen;
908
909         if (ctx->ares) {
910                 /* First call to encrypt finalizes GHASH(AAD) */
911                 GCM_MUL(ctx,Xi);
912                 ctx->ares = 0;
913         }
914
915         if (is_endian.little)
916                 ctr = GETU32(ctx->Yi.c+12);
917         else
918                 ctr = ctx->Yi.d[3];
919
920         n = ctx->mres;
921 #if !defined(OPENSSL_SMALL_FOOTPRINT)
922         if (16%sizeof(size_t) == 0) do {        /* always true actually */
923                 if (n) {
924                         while (n && len) {
925                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
926                                 --len;
927                                 n = (n+1)%16;
928                         }
929                         if (n==0) GCM_MUL(ctx,Xi);
930                         else {
931                                 ctx->mres = n;
932                                 return 0;
933                         }
934                 }
935 #if defined(STRICT_ALIGNMENT)
936                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
937                         break;
938 #endif
939 #if defined(GHASH) && defined(GHASH_CHUNK)
940                 while (len>=GHASH_CHUNK) {
941                     size_t j=GHASH_CHUNK;
942
943                     while (j) {
944                         size_t *out_t=(size_t *)out;
945                         const size_t *in_t=(const size_t *)in;
946
947                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
948                         ++ctr;
949                         if (is_endian.little)
950                                 PUTU32(ctx->Yi.c+12,ctr);
951                         else
952                                 ctx->Yi.d[3] = ctr;
953                         for (i=0; i<16/sizeof(size_t); ++i)
954                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
955                         out += 16;
956                         in  += 16;
957                         j   -= 16;
958                     }
959                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
960                     len -= GHASH_CHUNK;
961                 }
962                 if ((i = (len&(size_t)-16))) {
963                     size_t j=i;
964
965                     while (len>=16) {
966                         size_t *out_t=(size_t *)out;
967                         const size_t *in_t=(const size_t *)in;
968
969                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
970                         ++ctr;
971                         if (is_endian.little)
972                                 PUTU32(ctx->Yi.c+12,ctr);
973                         else
974                                 ctx->Yi.d[3] = ctr;
975                         for (i=0; i<16/sizeof(size_t); ++i)
976                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
977                         out += 16;
978                         in  += 16;
979                         len -= 16;
980                     }
981                     GHASH(ctx,out-j,j);
982                 }
983 #else
984                 while (len>=16) {
985                         size_t *out_t=(size_t *)out;
986                         const size_t *in_t=(const size_t *)in;
987
988                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
989                         ++ctr;
990                         if (is_endian.little)
991                                 PUTU32(ctx->Yi.c+12,ctr);
992                         else
993                                 ctx->Yi.d[3] = ctr;
994                         for (i=0; i<16/sizeof(size_t); ++i)
995                                 ctx->Xi.t[i] ^=
996                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
997                         GCM_MUL(ctx,Xi);
998                         out += 16;
999                         in  += 16;
1000                         len -= 16;
1001                 }
1002 #endif
1003                 if (len) {
1004                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1005                         ++ctr;
1006                         if (is_endian.little)
1007                                 PUTU32(ctx->Yi.c+12,ctr);
1008                         else
1009                                 ctx->Yi.d[3] = ctr;
1010                         while (len--) {
1011                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1012                                 ++n;
1013                         }
1014                 }
1015
1016                 ctx->mres = n;
1017                 return 0;
1018         } while(0);
1019 #endif
1020         for (i=0;i<len;++i) {
1021                 if (n==0) {
1022                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1023                         ++ctr;
1024                         if (is_endian.little)
1025                                 PUTU32(ctx->Yi.c+12,ctr);
1026                         else
1027                                 ctx->Yi.d[3] = ctr;
1028                 }
1029                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1030                 n = (n+1)%16;
1031                 if (n==0)
1032                         GCM_MUL(ctx,Xi);
1033         }
1034
1035         ctx->mres = n;
1036         return 0;
1037 }
1038
1039 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1040                 const unsigned char *in, unsigned char *out,
1041                 size_t len)
1042 {
1043         const union { long one; char little; } is_endian = {1};
1044         unsigned int n, ctr;
1045         size_t i;
1046         u64        mlen  = ctx->len.u[1];
1047         block128_f block = ctx->block;
1048         void      *key   = ctx->key;
1049 #ifdef GCM_FUNCREF_4BIT
1050         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1051 # ifdef GHASH
1052         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1053                                 const u8 *inp,size_t len)       = ctx->ghash;
1054 # endif
1055 #endif
1056
1057         mlen += len;
1058         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1059                 return -1;
1060         ctx->len.u[1] = mlen;
1061
1062         if (ctx->ares) {
1063                 /* First call to decrypt finalizes GHASH(AAD) */
1064                 GCM_MUL(ctx,Xi);
1065                 ctx->ares = 0;
1066         }
1067
1068         if (is_endian.little)
1069                 ctr = GETU32(ctx->Yi.c+12);
1070         else
1071                 ctr = ctx->Yi.d[3];
1072
1073         n = ctx->mres;
1074 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1075         if (16%sizeof(size_t) == 0) do {        /* always true actually */
1076                 if (n) {
1077                         while (n && len) {
1078                                 u8 c = *(in++);
1079                                 *(out++) = c^ctx->EKi.c[n];
1080                                 ctx->Xi.c[n] ^= c;
1081                                 --len;
1082                                 n = (n+1)%16;
1083                         }
1084                         if (n==0) GCM_MUL (ctx,Xi);
1085                         else {
1086                                 ctx->mres = n;
1087                                 return 0;
1088                         }
1089                 }
1090 #if defined(STRICT_ALIGNMENT)
1091                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1092                         break;
1093 #endif
1094 #if defined(GHASH) && defined(GHASH_CHUNK)
1095                 while (len>=GHASH_CHUNK) {
1096                     size_t j=GHASH_CHUNK;
1097
1098                     GHASH(ctx,in,GHASH_CHUNK);
1099                     while (j) {
1100                         size_t *out_t=(size_t *)out;
1101                         const size_t *in_t=(const size_t *)in;
1102
1103                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1104                         ++ctr;
1105                         if (is_endian.little)
1106                                 PUTU32(ctx->Yi.c+12,ctr);
1107                         else
1108                                 ctx->Yi.d[3] = ctr;
1109                         for (i=0; i<16/sizeof(size_t); ++i)
1110                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1111                         out += 16;
1112                         in  += 16;
1113                         j   -= 16;
1114                     }
1115                     len -= GHASH_CHUNK;
1116                 }
1117                 if ((i = (len&(size_t)-16))) {
1118                     GHASH(ctx,in,i);
1119                     while (len>=16) {
1120                         size_t *out_t=(size_t *)out;
1121                         const size_t *in_t=(const size_t *)in;
1122
1123                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1124                         ++ctr;
1125                         if (is_endian.little)
1126                                 PUTU32(ctx->Yi.c+12,ctr);
1127                         else
1128                                 ctx->Yi.d[3] = ctr;
1129                         for (i=0; i<16/sizeof(size_t); ++i)
1130                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1131                         out += 16;
1132                         in  += 16;
1133                         len -= 16;
1134                     }
1135                 }
1136 #else
1137                 while (len>=16) {
1138                         size_t *out_t=(size_t *)out;
1139                         const size_t *in_t=(const size_t *)in;
1140
1141                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1142                         ++ctr;
1143                         if (is_endian.little)
1144                                 PUTU32(ctx->Yi.c+12,ctr);
1145                         else
1146                                 ctx->Yi.d[3] = ctr;
1147                         for (i=0; i<16/sizeof(size_t); ++i) {
1148                                 size_t c = in[i];
1149                                 out[i] = c^ctx->EKi.t[i];
1150                                 ctx->Xi.t[i] ^= c;
1151                         }
1152                         GCM_MUL(ctx,Xi);
1153                         out += 16;
1154                         in  += 16;
1155                         len -= 16;
1156                 }
1157 #endif
1158                 if (len) {
1159                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1160                         ++ctr;
1161                         if (is_endian.little)
1162                                 PUTU32(ctx->Yi.c+12,ctr);
1163                         else
1164                                 ctx->Yi.d[3] = ctr;
1165                         while (len--) {
1166                                 u8 c = in[n];
1167                                 ctx->Xi.c[n] ^= c;
1168                                 out[n] = c^ctx->EKi.c[n];
1169                                 ++n;
1170                         }
1171                 }
1172
1173                 ctx->mres = n;
1174                 return 0;
1175         } while(0);
1176 #endif
1177         for (i=0;i<len;++i) {
1178                 u8 c;
1179                 if (n==0) {
1180                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1181                         ++ctr;
1182                         if (is_endian.little)
1183                                 PUTU32(ctx->Yi.c+12,ctr);
1184                         else
1185                                 ctx->Yi.d[3] = ctr;
1186                 }
1187                 c = in[i];
1188                 out[i] = c^ctx->EKi.c[n];
1189                 ctx->Xi.c[n] ^= c;
1190                 n = (n+1)%16;
1191                 if (n==0)
1192                         GCM_MUL(ctx,Xi);
1193         }
1194
1195         ctx->mres = n;
1196         return 0;
1197 }
1198
1199 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1200                 const unsigned char *in, unsigned char *out,
1201                 size_t len, ctr128_f stream)
1202 {
1203         const union { long one; char little; } is_endian = {1};
1204         unsigned int n, ctr;
1205         size_t i;
1206         u64   mlen = ctx->len.u[1];
1207         void *key  = ctx->key;
1208 #ifdef GCM_FUNCREF_4BIT
1209         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1210 # ifdef GHASH
1211         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1212                                 const u8 *inp,size_t len)       = ctx->ghash;
1213 # endif
1214 #endif
1215
1216         mlen += len;
1217         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1218                 return -1;
1219         ctx->len.u[1] = mlen;
1220
1221         if (ctx->ares) {
1222                 /* First call to encrypt finalizes GHASH(AAD) */
1223                 GCM_MUL(ctx,Xi);
1224                 ctx->ares = 0;
1225         }
1226
1227         if (is_endian.little)
1228                 ctr = GETU32(ctx->Yi.c+12);
1229         else
1230                 ctr = ctx->Yi.d[3];
1231
1232         n = ctx->mres;
1233         if (n) {
1234                 while (n && len) {
1235                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1236                         --len;
1237                         n = (n+1)%16;
1238                 }
1239                 if (n==0) GCM_MUL(ctx,Xi);
1240                 else {
1241                         ctx->mres = n;
1242                         return 0;
1243                 }
1244         }
1245 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1246         while (len>=GHASH_CHUNK) {
1247                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1248                 ctr += GHASH_CHUNK/16;
1249                 if (is_endian.little)
1250                         PUTU32(ctx->Yi.c+12,ctr);
1251                 else
1252                         ctx->Yi.d[3] = ctr;
1253                 GHASH(ctx,out,GHASH_CHUNK);
1254                 out += GHASH_CHUNK;
1255                 in  += GHASH_CHUNK;
1256                 len -= GHASH_CHUNK;
1257         }
1258 #endif
1259         if ((i = (len&(size_t)-16))) {
1260                 size_t j=i/16;
1261
1262                 (*stream)(in,out,j,key,ctx->Yi.c);
1263                 ctr += (unsigned int)j;
1264                 if (is_endian.little)
1265                         PUTU32(ctx->Yi.c+12,ctr);
1266                 else
1267                         ctx->Yi.d[3] = ctr;
1268                 in  += i;
1269                 len -= i;
1270 #if defined(GHASH)
1271                 GHASH(ctx,out,i);
1272                 out += i;
1273 #else
1274                 while (j--) {
1275                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1276                         GCM_MUL(ctx,Xi);
1277                         out += 16;
1278                 }
1279 #endif
1280         }
1281         if (len) {
1282                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1283                 ++ctr;
1284                 if (is_endian.little)
1285                         PUTU32(ctx->Yi.c+12,ctr);
1286                 else
1287                         ctx->Yi.d[3] = ctr;
1288                 while (len--) {
1289                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1290                         ++n;
1291                 }
1292         }
1293
1294         ctx->mres = n;
1295         return 0;
1296 }
1297
1298 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1299                 const unsigned char *in, unsigned char *out,
1300                 size_t len,ctr128_f stream)
1301 {
1302         const union { long one; char little; } is_endian = {1};
1303         unsigned int n, ctr;
1304         size_t i;
1305         u64   mlen = ctx->len.u[1];
1306         void *key  = ctx->key;
1307 #ifdef GCM_FUNCREF_4BIT
1308         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1309 # ifdef GHASH
1310         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1311                                 const u8 *inp,size_t len)       = ctx->ghash;
1312 # endif
1313 #endif
1314
1315         mlen += len;
1316         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1317                 return -1;
1318         ctx->len.u[1] = mlen;
1319
1320         if (ctx->ares) {
1321                 /* First call to decrypt finalizes GHASH(AAD) */
1322                 GCM_MUL(ctx,Xi);
1323                 ctx->ares = 0;
1324         }
1325
1326         if (is_endian.little)
1327                 ctr = GETU32(ctx->Yi.c+12);
1328         else
1329                 ctr = ctx->Yi.d[3];
1330
1331         n = ctx->mres;
1332         if (n) {
1333                 while (n && len) {
1334                         u8 c = *(in++);
1335                         *(out++) = c^ctx->EKi.c[n];
1336                         ctx->Xi.c[n] ^= c;
1337                         --len;
1338                         n = (n+1)%16;
1339                 }
1340                 if (n==0) GCM_MUL (ctx,Xi);
1341                 else {
1342                         ctx->mres = n;
1343                         return 0;
1344                 }
1345         }
1346 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1347         while (len>=GHASH_CHUNK) {
1348                 GHASH(ctx,in,GHASH_CHUNK);
1349                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1350                 ctr += GHASH_CHUNK/16;
1351                 if (is_endian.little)
1352                         PUTU32(ctx->Yi.c+12,ctr);
1353                 else
1354                         ctx->Yi.d[3] = ctr;
1355                 out += GHASH_CHUNK;
1356                 in  += GHASH_CHUNK;
1357                 len -= GHASH_CHUNK;
1358         }
1359 #endif
1360         if ((i = (len&(size_t)-16))) {
1361                 size_t j=i/16;
1362
1363 #if defined(GHASH)
1364                 GHASH(ctx,in,i);
1365 #else
1366                 while (j--) {
1367                         size_t k;
1368                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1369                         GCM_MUL(ctx,Xi);
1370                         in += 16;
1371                 }
1372                 j   = i/16;
1373                 in -= i;
1374 #endif
1375                 (*stream)(in,out,j,key,ctx->Yi.c);
1376                 ctr += (unsigned int)j;
1377                 if (is_endian.little)
1378                         PUTU32(ctx->Yi.c+12,ctr);
1379                 else
1380                         ctx->Yi.d[3] = ctr;
1381                 out += i;
1382                 in  += i;
1383                 len -= i;
1384         }
1385         if (len) {
1386                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1387                 ++ctr;
1388                 if (is_endian.little)
1389                         PUTU32(ctx->Yi.c+12,ctr);
1390                 else
1391                         ctx->Yi.d[3] = ctr;
1392                 while (len--) {
1393                         u8 c = in[n];
1394                         ctx->Xi.c[n] ^= c;
1395                         out[n] = c^ctx->EKi.c[n];
1396                         ++n;
1397                 }
1398         }
1399
1400         ctx->mres = n;
1401         return 0;
1402 }
1403
1404 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1405                         size_t len)
1406 {
1407         const union { long one; char little; } is_endian = {1};
1408         u64 alen = ctx->len.u[0]<<3;
1409         u64 clen = ctx->len.u[1]<<3;
1410 #ifdef GCM_FUNCREF_4BIT
1411         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1412 #endif
1413
1414         if (ctx->mres || ctx->ares)
1415                 GCM_MUL(ctx,Xi);
1416
1417         if (is_endian.little) {
1418 #ifdef BSWAP8
1419                 alen = BSWAP8(alen);
1420                 clen = BSWAP8(clen);
1421 #else
1422                 u8 *p = ctx->len.c;
1423
1424                 ctx->len.u[0] = alen;
1425                 ctx->len.u[1] = clen;
1426
1427                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1428                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1429 #endif
1430         }
1431
1432         ctx->Xi.u[0] ^= alen;
1433         ctx->Xi.u[1] ^= clen;
1434         GCM_MUL(ctx,Xi);
1435
1436         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1437         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1438
1439         if (tag && len<=sizeof(ctx->Xi))
1440                 return memcmp(ctx->Xi.c,tag,len);
1441         else
1442                 return -1;
1443 }
1444
1445 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1446 {
1447         CRYPTO_gcm128_finish(ctx, NULL, 0);
1448         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1449 }
1450
1451 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1452 {
1453         GCM128_CONTEXT *ret;
1454
1455         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1456                 CRYPTO_gcm128_init(ret,key,block);
1457
1458         return ret;
1459 }
1460
1461 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1462 {
1463         if (ctx) {
1464                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1465                 OPENSSL_free(ctx);
1466         }
1467 }
1468
1469 #if defined(SELFTEST)
1470 #include <stdio.h>
1471 #include <openssl/aes.h>
1472
1473 /* Test Case 1 */
1474 static const u8 K1[16],
1475                 *P1=NULL,
1476                 *A1=NULL,
1477                 IV1[12],
1478                 *C1=NULL,
1479                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1480
1481 /* Test Case 2 */
1482 #define K2 K1
1483 #define A2 A1
1484 #define IV2 IV1
1485 static const u8 P2[16],
1486                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1487                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1488
1489 /* Test Case 3 */
1490 #define A3 A2
1491 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1492                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1493                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1494                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1495                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1496                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1497                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1498                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1499                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1500                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1501                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1502
1503 /* Test Case 4 */
1504 #define K4 K3
1505 #define IV4 IV3
1506 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1507                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1508                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1509                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1510                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1511                         0xab,0xad,0xda,0xd2},
1512                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1513                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1514                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1515                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1516                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1517
1518 /* Test Case 5 */
1519 #define K5 K4
1520 #define P5 P4
1521 #define A5 A4
1522 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1523                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1524                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1525                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1526                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1527                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1528
1529 /* Test Case 6 */
1530 #define K6 K5
1531 #define P6 P5
1532 #define A6 A5
1533 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1534                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1535                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1536                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1537                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1538                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1539                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1540                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1541                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1542
1543 /* Test Case 7 */
1544 static const u8 K7[24],
1545                 *P7=NULL,
1546                 *A7=NULL,
1547                 IV7[12],
1548                 *C7=NULL,
1549                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1550
1551 /* Test Case 8 */
1552 #define K8 K7
1553 #define IV8 IV7
1554 #define A8 A7
1555 static const u8 P8[16],
1556                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1557                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1558
1559 /* Test Case 9 */
1560 #define A9 A8
1561 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1562                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1563                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1564                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1565                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1566                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1567                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1568                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1569                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1570                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1571                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1572                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1573
1574 /* Test Case 10 */
1575 #define K10 K9
1576 #define IV10 IV9
1577 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1578                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1579                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1580                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1581                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1582                         0xab,0xad,0xda,0xd2},
1583                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1584                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1585                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1586                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1587                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1588
1589 /* Test Case 11 */
1590 #define K11 K10
1591 #define P11 P10
1592 #define A11 A10
1593 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1594                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1595                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1596                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1597                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1598                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1599
1600 /* Test Case 12 */
1601 #define K12 K11
1602 #define P12 P11
1603 #define A12 A11
1604 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1605                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1606                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1607                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1608                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1609                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1610                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1611                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1612                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1613
1614 /* Test Case 13 */
1615 static const u8 K13[32],
1616                 *P13=NULL,
1617                 *A13=NULL,
1618                 IV13[12],
1619                 *C13=NULL,
1620                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1621
1622 /* Test Case 14 */
1623 #define K14 K13
1624 #define A14 A13
1625 static const u8 P14[16],
1626                 IV14[12],
1627                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1628                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1629
1630 /* Test Case 15 */
1631 #define A15 A14
1632 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1633                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1634                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1635                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1636                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1637                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1638                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1639                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1640                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1641                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1642                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1643                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1644
1645 /* Test Case 16 */
1646 #define K16 K15
1647 #define IV16 IV15
1648 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1649                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1650                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1651                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1652                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1653                         0xab,0xad,0xda,0xd2},
1654                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1655                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1656                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1657                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1658                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1659
1660 /* Test Case 17 */
1661 #define K17 K16
1662 #define P17 P16
1663 #define A17 A16
1664 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1665                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1666                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1667                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1668                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1669                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1670
1671 /* Test Case 18 */
1672 #define K18 K17
1673 #define P18 P17
1674 #define A18 A17
1675 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1676                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1677                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1678                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1679                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1680                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1681                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1682                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1683                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1684
1685 /* Test Case 19 */
1686 #define K19 K1
1687 #define P19 P1
1688 #define IV19 IV1
1689 #define C19 C1
1690 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1691                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1692                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1693                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1694                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1695                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1696                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1697                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1698                 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1699
1700 /* Test Case 20 */
1701 #define K20 K1
1702 #define A20 A1
1703 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1704                 P20[288],
1705                 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1706                         0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1707                         0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1708                         0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1709                         0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1710                         0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1711                         0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1712                         0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1713                         0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1714                         0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1715                         0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1716                         0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1717                         0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1718                         0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1719                         0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1720                         0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1721                         0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1722                         0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1723                 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1724
1725 #define TEST_CASE(n)    do {                                    \
1726         u8 out[sizeof(P##n)];                                   \
1727         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1728         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1729         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1730         memset(out,0,sizeof(out));                              \
1731         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1732         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1733         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1734             (C##n && memcmp(out,C##n,sizeof(out))))             \
1735                 ret++, printf ("encrypt test#%d failed.\n",n);  \
1736         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1737         memset(out,0,sizeof(out));                              \
1738         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1739         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1740         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1741             (P##n && memcmp(out,P##n,sizeof(out))))             \
1742                 ret++, printf ("decrypt test#%d failed.\n",n);  \
1743         } while(0)
1744
1745 int main()
1746 {
1747         GCM128_CONTEXT ctx;
1748         AES_KEY key;
1749         int ret=0;
1750
1751         TEST_CASE(1);
1752         TEST_CASE(2);
1753         TEST_CASE(3);
1754         TEST_CASE(4);
1755         TEST_CASE(5);
1756         TEST_CASE(6);
1757         TEST_CASE(7);
1758         TEST_CASE(8);
1759         TEST_CASE(9);
1760         TEST_CASE(10);
1761         TEST_CASE(11);
1762         TEST_CASE(12);
1763         TEST_CASE(13);
1764         TEST_CASE(14);
1765         TEST_CASE(15);
1766         TEST_CASE(16);
1767         TEST_CASE(17);
1768         TEST_CASE(18);
1769         TEST_CASE(19);
1770         TEST_CASE(20);
1771
1772 #ifdef OPENSSL_CPUID_OBJ
1773         {
1774         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1775         union { u64 u; u8 c[1024]; } buf;
1776         int i;
1777
1778         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1779         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1780         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1781
1782         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1783         start = OPENSSL_rdtsc();
1784         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1785         gcm_t = OPENSSL_rdtsc() - start;
1786
1787         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1788                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1789                         (block128_f)AES_encrypt);
1790         start = OPENSSL_rdtsc();
1791         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1792                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1793                         (block128_f)AES_encrypt);
1794         ctr_t = OPENSSL_rdtsc() - start;
1795
1796         printf("%.2f-%.2f=%.2f\n",
1797                         gcm_t/(double)sizeof(buf),
1798                         ctr_t/(double)sizeof(buf),
1799                         (gcm_t-ctr_t)/(double)sizeof(buf));
1800 #ifdef GHASH
1801         {
1802         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1803                                 const u8 *inp,size_t len)       = ctx.ghash;
1804
1805         GHASH((&ctx),buf.c,sizeof(buf));
1806         start = OPENSSL_rdtsc();
1807         for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1808         gcm_t = OPENSSL_rdtsc() - start;
1809         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1810         }
1811 #endif
1812         }
1813 #endif
1814
1815         return ret;
1816 }
1817 #endif