gcm128.c: commentary update.
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #include "modes_lcl.h"
51 #include <string.h>
52
53 #ifndef MODES_DEBUG
54 # ifndef NDEBUG
55 #  define NDEBUG
56 # endif
57 #endif
58 #include <assert.h>
59
60 typedef struct { u64 hi,lo; } u128;
61
62 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
63 /* redefine, because alignment is ensured */
64 #undef  GETU32
65 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
66 #undef  PUTU32
67 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
68 #endif
69
70 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
71 #define REDUCE1BIT(V)   do { \
72         if (sizeof(size_t)==8) { \
73                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
74                 V.lo  = (V.hi<<63)|(V.lo>>1); \
75                 V.hi  = (V.hi>>1 )^T; \
76         } \
77         else { \
78                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
79                 V.lo  = (V.hi<<63)|(V.lo>>1); \
80                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
81         } \
82 } while(0)
83
84 #ifdef  TABLE_BITS
85 #undef  TABLE_BITS
86 #endif
87 /*
88  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
89  * never be set to 8. 8 is effectively reserved for testing purposes.
90  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
91  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
92  * whole spectrum of possible table driven implementations. Why? In
93  * non-"Shoup's" case memory access pattern is segmented in such manner,
94  * that it's trivial to see that cache timing information can reveal
95  * fair portion of intermediate hash value. Given that ciphertext is
96  * always available to attacker, it's possible for him to attempt to
97  * deduce secret parameter H and if successful, tamper with messages
98  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
99  * not as trivial, but there is no reason to believe that it's resistant
100  * to cache-timing attack. And the thing about "8-bit" implementation is
101  * that it consumes 16 (sixteen) times more memory, 4KB per individual
102  * key + 1KB shared. Well, on pros side it should be twice as fast as
103  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
104  * was observed to run ~75% faster, closer to 100% for commercial
105  * compilers... Yet "4-bit" procedure is preferred, because it's
106  * believed to provide better security-performance balance and adequate
107  * all-round performance. "All-round" refers to things like:
108  *
109  * - shorter setup time effectively improves overall timing for
110  *   handling short messages;
111  * - larger table allocation can become unbearable because of VM
112  *   subsystem penalties (for example on Windows large enough free
113  *   results in VM working set trimming, meaning that consequent
114  *   malloc would immediately incur working set expansion);
115  * - larger table has larger cache footprint, which can affect
116  *   performance of other code paths (not necessarily even from same
117  *   thread in Hyper-Threading world);
118  */
119 #define TABLE_BITS 4
120
121 #if     TABLE_BITS==8
122
123 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
124 {
125         int  i, j;
126         u128 V;
127
128         Htable[0].hi = 0;
129         Htable[0].lo = 0;
130         V.hi = H[0];
131         V.lo = H[1];
132
133         for (Htable[128]=V, i=64; i>0; i>>=1) {
134                 REDUCE1BIT(V);
135                 Htable[i] = V;
136         }
137
138         for (i=2; i<256; i<<=1) {
139                 u128 *Hi = Htable+i, H0 = *Hi;
140                 for (j=1; j<i; ++j) {
141                         Hi[j].hi = H0.hi^Htable[j].hi;
142                         Hi[j].lo = H0.lo^Htable[j].lo;
143                 }
144         }
145 }
146
147 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
148 {
149         u128 Z = { 0, 0};
150         const u8 *xi = (const u8 *)Xi+15;
151         size_t rem, n = *xi;
152         const union { long one; char little; } is_endian = {1};
153         static const size_t rem_8bit[256] = {
154                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
155                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
156                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
157                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
158                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
159                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
160                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
161                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
162                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
163                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
164                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
165                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
166                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
167                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
168                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
169                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
170                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
171                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
172                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
173                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
174                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
175                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
176                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
177                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
178                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
179                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
180                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
181                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
182                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
183                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
184                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
185                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
186                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
187                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
188                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
189                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
190                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
191                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
192                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
193                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
194                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
195                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
196                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
197                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
198                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
199                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
200                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
201                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
202                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
203                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
204                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
205                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
206                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
207                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
208                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
209                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
210                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
211                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
212                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
213                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
214                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
215                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
216                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
217                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
218
219         while (1) {
220                 Z.hi ^= Htable[n].hi;
221                 Z.lo ^= Htable[n].lo;
222
223                 if ((u8 *)Xi==xi)       break;
224
225                 n = *(--xi);
226
227                 rem  = (size_t)Z.lo&0xff;
228                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
229                 Z.hi = (Z.hi>>8);
230                 if (sizeof(size_t)==8)
231                         Z.hi ^= rem_8bit[rem];
232                 else
233                         Z.hi ^= (u64)rem_8bit[rem]<<32;
234         }
235
236         if (is_endian.little) {
237 #ifdef BSWAP8
238                 Xi[0] = BSWAP8(Z.hi);
239                 Xi[1] = BSWAP8(Z.lo);
240 #else
241                 u8 *p = (u8 *)Xi;
242                 u32 v;
243                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
244                 v = (u32)(Z.hi);        PUTU32(p+4,v);
245                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
246                 v = (u32)(Z.lo);        PUTU32(p+12,v);
247 #endif
248         }
249         else {
250                 Xi[0] = Z.hi;
251                 Xi[1] = Z.lo;
252         }
253 }
254 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
255
256 #elif   TABLE_BITS==4
257
258 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
259 {
260         u128 V;
261 #if defined(OPENSSL_SMALL_FOOTPRINT)
262         int  i;
263 #endif
264
265         Htable[0].hi = 0;
266         Htable[0].lo = 0;
267         V.hi = H[0];
268         V.lo = H[1];
269
270 #if defined(OPENSSL_SMALL_FOOTPRINT)
271         for (Htable[8]=V, i=4; i>0; i>>=1) {
272                 REDUCE1BIT(V);
273                 Htable[i] = V;
274         }
275
276         for (i=2; i<16; i<<=1) {
277                 u128 *Hi = Htable+i;
278                 int   j;
279                 for (V=*Hi, j=1; j<i; ++j) {
280                         Hi[j].hi = V.hi^Htable[j].hi;
281                         Hi[j].lo = V.lo^Htable[j].lo;
282                 }
283         }
284 #else
285         Htable[8] = V;
286         REDUCE1BIT(V);
287         Htable[4] = V;
288         REDUCE1BIT(V);
289         Htable[2] = V;
290         REDUCE1BIT(V);
291         Htable[1] = V;
292         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
293         V=Htable[4];
294         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
295         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
296         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
297         V=Htable[8];
298         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
299         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
300         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
301         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
302         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
303         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
304         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
305 #endif
306 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
307         /*
308          * ARM assembler expects specific dword order in Htable.
309          */
310         {
311         int j;
312         const union { long one; char little; } is_endian = {1};
313
314         if (is_endian.little)
315                 for (j=0;j<16;++j) {
316                         V = Htable[j];
317                         Htable[j].hi = V.lo;
318                         Htable[j].lo = V.hi;
319                 }
320         else
321                 for (j=0;j<16;++j) {
322                         V = Htable[j];
323                         Htable[j].hi = V.lo<<32|V.lo>>32;
324                         Htable[j].lo = V.hi<<32|V.hi>>32;
325                 }
326         }
327 #endif
328 }
329
330 #ifndef GHASH_ASM
331 static const size_t rem_4bit[16] = {
332         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
338 {
339         u128 Z;
340         int cnt = 15;
341         size_t rem, nlo, nhi;
342         const union { long one; char little; } is_endian = {1};
343
344         nlo  = ((const u8 *)Xi)[15];
345         nhi  = nlo>>4;
346         nlo &= 0xf;
347
348         Z.hi = Htable[nlo].hi;
349         Z.lo = Htable[nlo].lo;
350
351         while (1) {
352                 rem  = (size_t)Z.lo&0xf;
353                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354                 Z.hi = (Z.hi>>4);
355                 if (sizeof(size_t)==8)
356                         Z.hi ^= rem_4bit[rem];
357                 else
358                         Z.hi ^= (u64)rem_4bit[rem]<<32;
359
360                 Z.hi ^= Htable[nhi].hi;
361                 Z.lo ^= Htable[nhi].lo;
362
363                 if (--cnt<0)            break;
364
365                 nlo  = ((const u8 *)Xi)[cnt];
366                 nhi  = nlo>>4;
367                 nlo &= 0xf;
368
369                 rem  = (size_t)Z.lo&0xf;
370                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371                 Z.hi = (Z.hi>>4);
372                 if (sizeof(size_t)==8)
373                         Z.hi ^= rem_4bit[rem];
374                 else
375                         Z.hi ^= (u64)rem_4bit[rem]<<32;
376
377                 Z.hi ^= Htable[nlo].hi;
378                 Z.lo ^= Htable[nlo].lo;
379         }
380
381         if (is_endian.little) {
382 #ifdef BSWAP8
383                 Xi[0] = BSWAP8(Z.hi);
384                 Xi[1] = BSWAP8(Z.lo);
385 #else
386                 u8 *p = (u8 *)Xi;
387                 u32 v;
388                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
389                 v = (u32)(Z.hi);        PUTU32(p+4,v);
390                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
391                 v = (u32)(Z.lo);        PUTU32(p+12,v);
392 #endif
393         }
394         else {
395                 Xi[0] = Z.hi;
396                 Xi[1] = Z.lo;
397         }
398 }
399
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
401 /*
402  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403  * details... Compiler-generated code doesn't seem to give any
404  * performance improvement, at least not on x86[_64]. It's here
405  * mostly as reference and a placeholder for possible future
406  * non-trivial optimization[s]...
407  */
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409                                 const u8 *inp,size_t len)
410 {
411     u128 Z;
412     int cnt;
413     size_t rem, nlo, nhi;
414     const union { long one; char little; } is_endian = {1};
415
416     do {
417         cnt  = 15;
418         nlo  = ((const u8 *)Xi)[15];
419         nlo ^= inp[15];
420         nhi  = nlo>>4;
421         nlo &= 0xf;
422
423         Z.hi = Htable[nlo].hi;
424         Z.lo = Htable[nlo].lo;
425
426         while (1) {
427                 rem  = (size_t)Z.lo&0xf;
428                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
429                 Z.hi = (Z.hi>>4);
430                 if (sizeof(size_t)==8)
431                         Z.hi ^= rem_4bit[rem];
432                 else
433                         Z.hi ^= (u64)rem_4bit[rem]<<32;
434
435                 Z.hi ^= Htable[nhi].hi;
436                 Z.lo ^= Htable[nhi].lo;
437
438                 if (--cnt<0)            break;
439
440                 nlo  = ((const u8 *)Xi)[cnt];
441                 nlo ^= inp[cnt];
442                 nhi  = nlo>>4;
443                 nlo &= 0xf;
444
445                 rem  = (size_t)Z.lo&0xf;
446                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
447                 Z.hi = (Z.hi>>4);
448                 if (sizeof(size_t)==8)
449                         Z.hi ^= rem_4bit[rem];
450                 else
451                         Z.hi ^= (u64)rem_4bit[rem]<<32;
452
453                 Z.hi ^= Htable[nlo].hi;
454                 Z.lo ^= Htable[nlo].lo;
455         }
456
457         if (is_endian.little) {
458 #ifdef BSWAP8
459                 Xi[0] = BSWAP8(Z.hi);
460                 Xi[1] = BSWAP8(Z.lo);
461 #else
462                 u8 *p = (u8 *)Xi;
463                 u32 v;
464                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
465                 v = (u32)(Z.hi);        PUTU32(p+4,v);
466                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
467                 v = (u32)(Z.lo);        PUTU32(p+12,v);
468 #endif
469         }
470         else {
471                 Xi[0] = Z.hi;
472                 Xi[1] = Z.lo;
473         }
474     } while (inp+=16, len-=16);
475 }
476 #endif
477 #else
478 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
479 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
480 #endif
481
482 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
483 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
484 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
485 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
486  * trashing effect. In other words idea is to hash data while it's
487  * still in L1 cache after encryption pass... */
488 #define GHASH_CHUNK       1024
489 #endif
490
491 #else   /* TABLE_BITS */
492
493 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
494 {
495         u128 V,Z = { 0,0 };
496         long X;
497         int  i,j;
498         const long *xi = (const long *)Xi;
499         const union { long one; char little; } is_endian = {1};
500
501         V.hi = H[0];    /* H is in host byte order, no byte swapping */
502         V.lo = H[1];
503
504         for (j=0; j<16/sizeof(long); ++j) {
505                 if (is_endian.little) {
506                         if (sizeof(long)==8) {
507 #ifdef BSWAP8
508                                 X = (long)(BSWAP8(xi[j]));
509 #else
510                                 const u8 *p = (const u8 *)(xi+j);
511                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
512 #endif
513                         }
514                         else {
515                                 const u8 *p = (const u8 *)(xi+j);
516                                 X = (long)GETU32(p);
517                         }
518                 }
519                 else
520                         X = xi[j];
521
522                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
523                         u64 M = (u64)(X>>(8*sizeof(long)-1));
524                         Z.hi ^= V.hi&M;
525                         Z.lo ^= V.lo&M;
526
527                         REDUCE1BIT(V);
528                 }
529         }
530
531         if (is_endian.little) {
532 #ifdef BSWAP8
533                 Xi[0] = BSWAP8(Z.hi);
534                 Xi[1] = BSWAP8(Z.lo);
535 #else
536                 u8 *p = (u8 *)Xi;
537                 u32 v;
538                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
539                 v = (u32)(Z.hi);        PUTU32(p+4,v);
540                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
541                 v = (u32)(Z.lo);        PUTU32(p+12,v);
542 #endif
543         }
544         else {
545                 Xi[0] = Z.hi;
546                 Xi[1] = Z.lo;
547         }
548 }
549 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
550
551 #endif
552
553 struct gcm128_context {
554         /* Following 6 names follow names in GCM specification */
555         union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
556                                                 Xi,H,len;
557         /* Pre-computed table used by gcm_gmult_* */
558 #if TABLE_BITS==8
559         u128 Htable[256];
560 #else
561         u128 Htable[16];
562         void (*gmult)(u64 Xi[2],const u128 Htable[16]);
563         void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
564 #endif
565         unsigned int res, pad;
566         block128_f block;
567         void *key;
568 };
569
570 #if     TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
571         (defined(__i386)        || defined(__i386__)    || \
572          defined(__x86_64)      || defined(__x86_64__)  || \
573          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
574 # define GHASH_ASM_IAX
575 extern unsigned int OPENSSL_ia32cap_P[2];
576
577 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
578 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
579 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
580
581 # if    defined(__i386) || defined(__i386__) || defined(_M_IX86)
582 #  define GHASH_ASM_X86
583 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
584 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
585
586 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
587 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
588 # endif
589
590 # undef  GCM_MUL
591 # define GCM_MUL(ctx,Xi)   (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
592 # undef  GHASH
593 # define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
594 #endif
595
596 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
597 {
598         const union { long one; char little; } is_endian = {1};
599
600         memset(ctx,0,sizeof(*ctx));
601         ctx->block = block;
602         ctx->key   = key;
603
604         (*block)(ctx->H.c,ctx->H.c,key);
605
606         if (is_endian.little) {
607                 /* H is stored in host byte order */
608 #ifdef BSWAP8
609                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
610                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
611 #else
612                 u8 *p = ctx->H.c;
613                 u64 hi,lo;
614                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
615                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
616                 ctx->H.u[0] = hi;
617                 ctx->H.u[1] = lo;
618 #endif
619         }
620
621 #if     TABLE_BITS==8
622         gcm_init_8bit(ctx->Htable,ctx->H.u);
623 #elif   TABLE_BITS==4
624 # if    defined(GHASH_ASM_IAX)
625         if (OPENSSL_ia32cap_P[1]&(1<<1)) {
626                 gcm_init_clmul(ctx->Htable,ctx->H.u);
627                 ctx->gmult = gcm_gmult_clmul;
628                 ctx->ghash = gcm_ghash_clmul;
629                 return;
630         }
631         gcm_init_4bit(ctx->Htable,ctx->H.u);
632 #  if   defined(GHASH_ASM_X86)
633         if (OPENSSL_ia32cap_P[0]&(1<<23)) {
634                 ctx->gmult = gcm_gmult_4bit_mmx;
635                 ctx->ghash = gcm_ghash_4bit_mmx;
636         } else {
637                 ctx->gmult = gcm_gmult_4bit_x86;
638                 ctx->ghash = gcm_ghash_4bit_x86;
639         }
640 #  else
641         ctx->gmult = gcm_gmult_4bit;
642         ctx->ghash = gcm_ghash_4bit;
643 #  endif
644 # else
645         gcm_init_4bit(ctx->Htable,ctx->H.u);
646 # endif
647 #endif
648 }
649
650 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
651 {
652         const union { long one; char little; } is_endian = {1};
653         unsigned int ctr;
654
655         ctx->Yi.u[0]  = 0;
656         ctx->Yi.u[1]  = 0;
657         ctx->Xi.u[0]  = 0;
658         ctx->Xi.u[1]  = 0;
659         ctx->len.u[0] = 0;
660         ctx->len.u[1] = 0;
661         ctx->res = 0;
662
663         if (len==12) {
664                 memcpy(ctx->Yi.c,iv,12);
665                 ctx->Yi.c[15]=1;
666                 ctr=1;
667         }
668         else {
669                 size_t i;
670                 u64 len0 = len;
671
672                 while (len>=16) {
673                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
674                         GCM_MUL(ctx,Yi);
675                         iv += 16;
676                         len -= 16;
677                 }
678                 if (len) {
679                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
680                         GCM_MUL(ctx,Yi);
681                 }
682                 len0 <<= 3;
683                 if (is_endian.little) {
684 #ifdef BSWAP8
685                         ctx->Yi.u[1]  ^= BSWAP8(len0);
686 #else
687                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
688                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
689                         ctx->Yi.c[10] ^= (u8)(len0>>40);
690                         ctx->Yi.c[11] ^= (u8)(len0>>32);
691                         ctx->Yi.c[12] ^= (u8)(len0>>24);
692                         ctx->Yi.c[13] ^= (u8)(len0>>16);
693                         ctx->Yi.c[14] ^= (u8)(len0>>8);
694                         ctx->Yi.c[15] ^= (u8)(len0);
695 #endif
696                 }
697                 else
698                         ctx->Yi.u[1]  ^= len0;
699
700                 GCM_MUL(ctx,Yi);
701
702                 if (is_endian.little)
703                         ctr = GETU32(ctx->Yi.c+12);
704                 else
705                         ctr = ctx->Yi.d[3];
706         }
707
708         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
709         ++ctr;
710         if (is_endian.little)
711                 PUTU32(ctx->Yi.c+12,ctr);
712         else
713                 ctx->Yi.d[3] = ctr;
714 }
715
716 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
717 {
718         size_t i;
719
720         ctx->len.u[0] += len;
721
722 #ifdef GHASH
723         if ((i = (len&(size_t)-16))) {
724                 GHASH(ctx,aad,i);
725                 aad += i;
726                 len -= i;
727         }
728 #else
729         while (len>=16) {
730                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
731                 GCM_MUL(ctx,Xi);
732                 aad += 16;
733                 len -= 16;
734         }
735 #endif
736         if (len) {
737                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
738                 GCM_MUL(ctx,Xi);
739         }
740 }
741
742 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
743                 const unsigned char *in, unsigned char *out,
744                 size_t len)
745 {
746         const union { long one; char little; } is_endian = {1};
747         unsigned int n, ctr;
748         size_t i;
749
750         ctx->len.u[1] += len;
751         n   = ctx->res;
752         if (is_endian.little)
753                 ctr = GETU32(ctx->Yi.c+12);
754         else
755                 ctr = ctx->Yi.d[3];
756
757 #if !defined(OPENSSL_SMALL_FOOTPRINT)
758         if (16%sizeof(size_t) == 0) do {        /* always true actually */
759                 if (n) {
760                         while (n && len) {
761                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
762                                 --len;
763                                 n = (n+1)%16;
764                         }
765                         if (n==0) GCM_MUL(ctx,Xi);
766                         else {
767                                 ctx->res = n;
768                                 return;
769                         }
770                 }
771 #if defined(STRICT_ALIGNMENT)
772                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
773                         break;
774 #endif
775 #if defined(GHASH) && defined(GHASH_CHUNK)
776                 while (len>=GHASH_CHUNK) {
777                     size_t j=GHASH_CHUNK;
778
779                     while (j) {
780                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
781                         ++ctr;
782                         if (is_endian.little)
783                                 PUTU32(ctx->Yi.c+12,ctr);
784                         else
785                                 ctx->Yi.d[3] = ctr;
786                         for (i=0; i<16; i+=sizeof(size_t))
787                                 *(size_t *)(out+i) =
788                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
789                         out += 16;
790                         in  += 16;
791                         j   -= 16;
792                     }
793                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
794                     len -= GHASH_CHUNK;
795                 }
796                 if ((i = (len&(size_t)-16))) {
797                     size_t j=i;
798
799                     while (len>=16) {
800                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
801                         ++ctr;
802                         if (is_endian.little)
803                                 PUTU32(ctx->Yi.c+12,ctr);
804                         else
805                                 ctx->Yi.d[3] = ctr;
806                         for (i=0; i<16; i+=sizeof(size_t))
807                                 *(size_t *)(out+i) =
808                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
809                         out += 16;
810                         in  += 16;
811                         len -= 16;
812                     }
813                     GHASH(ctx,out-j,j);
814                 }
815 #else
816                 while (len>=16) {
817                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
818                         ++ctr;
819                         if (is_endian.little)
820                                 PUTU32(ctx->Yi.c+12,ctr);
821                         else
822                                 ctx->Yi.d[3] = ctr;
823                         for (i=0; i<16; i+=sizeof(size_t))
824                                 *(size_t *)(ctx->Xi.c+i) ^=
825                                 *(size_t *)(out+i) =
826                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
827                         GCM_MUL(ctx,Xi);
828                         out += 16;
829                         in  += 16;
830                         len -= 16;
831                 }
832 #endif
833                 if (len) {
834                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
835                         ++ctr;
836                         if (is_endian.little)
837                                 PUTU32(ctx->Yi.c+12,ctr);
838                         else
839                                 ctx->Yi.d[3] = ctr;
840                         while (len--) {
841                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
842                                 ++n;
843                         }
844                 }
845
846                 ctx->res = n;
847                 return;
848         } while(0);
849 #endif
850         for (i=0;i<len;++i) {
851                 if (n==0) {
852                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
853                         ++ctr;
854                         if (is_endian.little)
855                                 PUTU32(ctx->Yi.c+12,ctr);
856                         else
857                                 ctx->Yi.d[3] = ctr;
858                 }
859                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
860                 n = (n+1)%16;
861                 if (n==0)
862                         GCM_MUL(ctx,Xi);
863         }
864
865         ctx->res = n;
866 }
867
868 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
869                 const unsigned char *in, unsigned char *out,
870                 size_t len)
871 {
872         const union { long one; char little; } is_endian = {1};
873         unsigned int n, ctr;
874         size_t i;
875
876         ctx->len.u[1] += len;
877         n   = ctx->res;
878         if (is_endian.little)
879                 ctr = GETU32(ctx->Yi.c+12);
880         else
881                 ctr = ctx->Yi.d[3];
882
883 #if !defined(OPENSSL_SMALL_FOOTPRINT)
884         if (16%sizeof(size_t) == 0) do {        /* always true actually */
885                 if (n) {
886                         while (n && len) {
887                                 u8 c = *(in++);
888                                 *(out++) = c^ctx->EKi.c[n];
889                                 ctx->Xi.c[n] ^= c;
890                                 --len;
891                                 n = (n+1)%16;
892                         }
893                         if (n==0) GCM_MUL (ctx,Xi);
894                         else {
895                                 ctx->res = n;
896                                 return;
897                         }
898                 }
899 #if defined(STRICT_ALIGNMENT)
900                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
901                         break;
902 #endif
903 #if defined(GHASH) && defined(GHASH_CHUNK)
904                 while (len>=GHASH_CHUNK) {
905                     size_t j=GHASH_CHUNK;
906
907                     GHASH(ctx,in,GHASH_CHUNK);
908                     while (j) {
909                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
910                         ++ctr;
911                         if (is_endian.little)
912                                 PUTU32(ctx->Yi.c+12,ctr);
913                         else
914                                 ctx->Yi.d[3] = ctr;
915                         for (i=0; i<16; i+=sizeof(size_t))
916                                 *(size_t *)(out+i) =
917                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
918                         out += 16;
919                         in  += 16;
920                         j   -= 16;
921                     }
922                     len -= GHASH_CHUNK;
923                 }
924                 if ((i = (len&(size_t)-16))) {
925                     GHASH(ctx,in,i);
926                     while (len>=16) {
927                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
928                         ++ctr;
929                         if (is_endian.little)
930                                 PUTU32(ctx->Yi.c+12,ctr);
931                         else
932                                 ctx->Yi.d[3] = ctr;
933                         for (i=0; i<16; i+=sizeof(size_t))
934                                 *(size_t *)(out+i) =
935                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
936                         out += 16;
937                         in  += 16;
938                         len -= 16;
939                     }
940                 }
941 #else
942                 while (len>=16) {
943                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
944                         ++ctr;
945                         if (is_endian.little)
946                                 PUTU32(ctx->Yi.c+12,ctr);
947                         else
948                                 ctx->Yi.d[3] = ctr;
949                         for (i=0; i<16; i+=sizeof(size_t)) {
950                                 size_t c = *(size_t *)(in+i);
951                                 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
952                                 *(size_t *)(ctx->Xi.c+i) ^= c;
953                         }
954                         GCM_MUL(ctx,Xi);
955                         out += 16;
956                         in  += 16;
957                         len -= 16;
958                 }
959 #endif
960                 if (len) {
961                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
962                         ++ctr;
963                         if (is_endian.little)
964                                 PUTU32(ctx->Yi.c+12,ctr);
965                         else
966                                 ctx->Yi.d[3] = ctr;
967                         while (len--) {
968                                 u8 c = in[n];
969                                 ctx->Xi.c[n] ^= c;
970                                 out[n] = c^ctx->EKi.c[n];
971                                 ++n;
972                         }
973                 }
974
975                 ctx->res = n;
976                 return;
977         } while(0);
978 #endif
979         for (i=0;i<len;++i) {
980                 u8 c;
981                 if (n==0) {
982                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
983                         ++ctr;
984                         if (is_endian.little)
985                                 PUTU32(ctx->Yi.c+12,ctr);
986                         else
987                                 ctx->Yi.d[3] = ctr;
988                 }
989                 c = in[i];
990                 out[i] ^= ctx->EKi.c[n];
991                 ctx->Xi.c[n] ^= c;
992                 n = (n+1)%16;
993                 if (n==0)
994                         GCM_MUL(ctx,Xi);
995         }
996
997         ctx->res = n;
998 }
999
1000 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
1001 {
1002         const union { long one; char little; } is_endian = {1};
1003         u64 alen = ctx->len.u[0]<<3;
1004         u64 clen = ctx->len.u[1]<<3;
1005
1006         if (ctx->res)
1007                 GCM_MUL(ctx,Xi);
1008
1009         if (is_endian.little) {
1010 #ifdef BSWAP8
1011                 alen = BSWAP8(alen);
1012                 clen = BSWAP8(clen);
1013 #else
1014                 u8 *p = ctx->len.c;
1015
1016                 ctx->len.u[0] = alen;
1017                 ctx->len.u[1] = clen;
1018
1019                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1020                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1021 #endif
1022         }
1023
1024         ctx->Xi.u[0] ^= alen;
1025         ctx->Xi.u[1] ^= clen;
1026         GCM_MUL(ctx,Xi);
1027
1028         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1029         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1030 }
1031
1032 #if defined(SELFTEST)
1033 #include <stdio.h>
1034 #include <openssl/aes.h>
1035
1036 /* Test Case 1 */
1037 static const u8 K1[16],
1038                 *P1=NULL,
1039                 *A1=NULL,
1040                 IV1[12],
1041                 *C1=NULL,
1042                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1043
1044 /* Test Case 2 */
1045 #define K2 K1
1046 #define A2 A1
1047 #define IV2 IV1
1048 static const u8 P2[16],
1049                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1050                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1051
1052 /* Test Case 3 */
1053 #define A3 A2
1054 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1055                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1056                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1057                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1058                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1059                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1060                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1061                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1062                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1063                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1064                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1065
1066 /* Test Case 4 */
1067 #define K4 K3
1068 #define IV4 IV3
1069 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1070                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1071                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1072                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1073                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1074                         0xab,0xad,0xda,0xd2},
1075                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1076                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1077                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1078                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1079                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1080
1081 /* Test Case 5 */
1082 #define K5 K4
1083 #define P5 P4
1084 static const u8 A5[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1085                         0xab,0xad,0xda,0xd2},
1086                 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1087                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1088                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1089                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1090                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1091                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1092
1093 /* Test Case 6 */
1094 #define K6 K5
1095 #define P6 P5
1096 #define A6 A5
1097 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1098                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1099                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1100                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1101                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1102                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1103                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1104                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1105                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1106
1107 /* Test Case 7 */
1108 static const u8 K7[24],
1109                 *P7=NULL,
1110                 *A7=NULL,
1111                 IV7[12],
1112                 *C7=NULL,
1113                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1114
1115 /* Test Case 8 */
1116 #define K8 K7
1117 #define IV8 IV7
1118 #define A8 A7
1119 static const u8 P8[16],
1120                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1121                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1122
1123 /* Test Case 9 */
1124 #define A9 A8
1125 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1126                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1127                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1128                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1129                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1130                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1131                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1132                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1133                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1134                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1135                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1136                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1137
1138 /* Test Case 10 */
1139 #define K10 K9
1140 #define IV10 IV9
1141 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1142                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1143                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1144                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1145                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1146                         0xab,0xad,0xda,0xd2},
1147                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1148                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1149                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1150                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1151                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1152
1153 /* Test Case 11 */
1154 #define K11 K10
1155 #define P11 P10
1156 #define A11 A10
1157 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1158                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1159                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1160                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1161                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1162                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1163
1164 /* Test Case 12 */
1165 #define K12 K11
1166 #define P12 P11
1167 #define A12 A11
1168 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1169                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1170                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1171                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1172                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1173                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1174                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1175                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1176                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1177
1178 /* Test Case 13 */
1179 static const u8 K13[32],
1180                 *P13=NULL,
1181                 *A13=NULL,
1182                 IV13[12],
1183                 *C13=NULL,
1184                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1185
1186 /* Test Case 14 */
1187 #define K14 K13
1188 #define A14 A13
1189 static const u8 P14[16],
1190                 IV14[12],
1191                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1192                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1193
1194 /* Test Case 15 */
1195 #define A15 A14
1196 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1197                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1198                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1199                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1200                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1201                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1202                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1203                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1204                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1205                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1206                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1207                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1208
1209 /* Test Case 16 */
1210 #define K16 K15
1211 #define IV16 IV15
1212 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1213                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1214                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1215                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1216                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1217                         0xab,0xad,0xda,0xd2},
1218                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1219                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1220                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1221                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1222                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1223
1224 /* Test Case 17 */
1225 #define K17 K16
1226 #define P17 P16
1227 #define A17 A16
1228 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1229                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1230                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1231                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1232                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1233                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1234
1235 /* Test Case 18 */
1236 #define K18 K17
1237 #define P18 P17
1238 #define A18 A17
1239 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1240                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1241                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1242                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1243                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1244                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1245                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1246                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1247                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1248
1249 #define TEST_CASE(n)    do {                                    \
1250         u8 out[sizeof(P##n)];                                   \
1251         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1252         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1253         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1254         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1255         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1256         CRYPTO_gcm128_finish(&ctx);                             \
1257         if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1258                 ret++, printf ("encrypt test#%d failed.\n",n);\
1259         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1260         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1261         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1262         CRYPTO_gcm128_finish(&ctx);                             \
1263         if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1264                 ret++, printf ("decrypt test#%d failed.\n",n);\
1265         } while(0)
1266
1267 int main()
1268 {
1269         GCM128_CONTEXT ctx;
1270         AES_KEY key;
1271         int ret=0;
1272
1273         TEST_CASE(1);
1274         TEST_CASE(2);
1275         TEST_CASE(3);
1276         TEST_CASE(4);
1277         TEST_CASE(5);
1278         TEST_CASE(6);
1279         TEST_CASE(7);
1280         TEST_CASE(8);
1281         TEST_CASE(9);
1282         TEST_CASE(10);
1283         TEST_CASE(11);
1284         TEST_CASE(12);
1285         TEST_CASE(13);
1286         TEST_CASE(14);
1287         TEST_CASE(15);
1288         TEST_CASE(16);
1289         TEST_CASE(17);
1290         TEST_CASE(18);
1291
1292 #ifdef OPENSSL_CPUID_OBJ
1293         {
1294         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1295         union { u64 u; u8 c[1024]; } buf;
1296         int i;
1297
1298         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1299         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1300         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1301
1302         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1303         start = OPENSSL_rdtsc();
1304         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1305         gcm_t = OPENSSL_rdtsc() - start;
1306
1307         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1308                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1309                         (block128_f)AES_encrypt);
1310         start = OPENSSL_rdtsc();
1311         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1312                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1313                         (block128_f)AES_encrypt);
1314         ctr_t = OPENSSL_rdtsc() - start;
1315
1316         printf("%.2f-%.2f=%.2f\n",
1317                         gcm_t/(double)sizeof(buf),
1318                         ctr_t/(double)sizeof(buf),
1319                         (gcm_t-ctr_t)/(double)sizeof(buf));
1320 #ifdef GHASH
1321         GHASH(&ctx,buf.c,sizeof(buf));
1322         start = OPENSSL_rdtsc();
1323         for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1324         gcm_t = OPENSSL_rdtsc() - start;
1325         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1326 #endif
1327         }
1328 #endif
1329
1330         return ret;
1331 }
1332 #endif