8a48e90ac548d408b2e50f4cd327fe31afe3ce43
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 #if     TABLE_BITS==8
86
87 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
88 {
89         int  i, j;
90         u128 V;
91
92         Htable[0].hi = 0;
93         Htable[0].lo = 0;
94         V.hi = H[0];
95         V.lo = H[1];
96
97         for (Htable[128]=V, i=64; i>0; i>>=1) {
98                 REDUCE1BIT(V);
99                 Htable[i] = V;
100         }
101
102         for (i=2; i<256; i<<=1) {
103                 u128 *Hi = Htable+i, H0 = *Hi;
104                 for (j=1; j<i; ++j) {
105                         Hi[j].hi = H0.hi^Htable[j].hi;
106                         Hi[j].lo = H0.lo^Htable[j].lo;
107                 }
108         }
109 }
110
111 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
112 {
113         u128 Z = { 0, 0};
114         const u8 *xi = (const u8 *)Xi+15;
115         size_t rem, n = *xi;
116         const union { long one; char little; } is_endian = {1};
117         static const size_t rem_8bit[256] = {
118                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
119                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
120                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
121                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
122                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
123                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
124                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
125                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
126                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
127                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
128                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
129                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
130                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
131                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
132                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
133                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
134                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
135                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
136                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
137                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
138                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
139                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
140                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
141                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
142                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
143                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
144                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
145                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
146                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
147                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
148                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
149                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
150                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
151                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
152                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
153                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
154                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
155                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
156                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
157                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
158                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
159                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
160                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
161                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
162                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
163                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
164                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
165                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
166                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
167                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
168                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
169                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
170                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
171                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
172                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
173                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
174                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
175                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
176                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
177                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
178                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
179                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
180                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
181                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
182
183         while (1) {
184                 Z.hi ^= Htable[n].hi;
185                 Z.lo ^= Htable[n].lo;
186
187                 if ((u8 *)Xi==xi)       break;
188
189                 n = *(--xi);
190
191                 rem  = (size_t)Z.lo&0xff;
192                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
193                 Z.hi = (Z.hi>>8);
194                 if (sizeof(size_t)==8)
195                         Z.hi ^= rem_8bit[rem];
196                 else
197                         Z.hi ^= (u64)rem_8bit[rem]<<32;
198         }
199
200         if (is_endian.little) {
201 #ifdef BSWAP8
202                 Xi[0] = BSWAP8(Z.hi);
203                 Xi[1] = BSWAP8(Z.lo);
204 #else
205                 u8 *p = (u8 *)Xi;
206                 u32 v;
207                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
208                 v = (u32)(Z.hi);        PUTU32(p+4,v);
209                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
210                 v = (u32)(Z.lo);        PUTU32(p+12,v);
211 #endif
212         }
213         else {
214                 Xi[0] = Z.hi;
215                 Xi[1] = Z.lo;
216         }
217 }
218 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
219
220 #elif   TABLE_BITS==4
221
222 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
223 {
224         u128 V;
225 #if defined(OPENSSL_SMALL_FOOTPRINT)
226         int  i;
227 #endif
228
229         Htable[0].hi = 0;
230         Htable[0].lo = 0;
231         V.hi = H[0];
232         V.lo = H[1];
233
234 #if defined(OPENSSL_SMALL_FOOTPRINT)
235         for (Htable[8]=V, i=4; i>0; i>>=1) {
236                 REDUCE1BIT(V);
237                 Htable[i] = V;
238         }
239
240         for (i=2; i<16; i<<=1) {
241                 u128 *Hi = Htable+i;
242                 int   j;
243                 for (V=*Hi, j=1; j<i; ++j) {
244                         Hi[j].hi = V.hi^Htable[j].hi;
245                         Hi[j].lo = V.lo^Htable[j].lo;
246                 }
247         }
248 #else
249         Htable[8] = V;
250         REDUCE1BIT(V);
251         Htable[4] = V;
252         REDUCE1BIT(V);
253         Htable[2] = V;
254         REDUCE1BIT(V);
255         Htable[1] = V;
256         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
257         V=Htable[4];
258         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
259         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
260         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
261         V=Htable[8];
262         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
263         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
264         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
265         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
266         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
267         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
268         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
269 #endif
270 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
271         /*
272          * ARM assembler expects specific dword order in Htable.
273          */
274         {
275         int j;
276         const union { long one; char little; } is_endian = {1};
277
278         if (is_endian.little)
279                 for (j=0;j<16;++j) {
280                         V = Htable[j];
281                         Htable[j].hi = V.lo;
282                         Htable[j].lo = V.hi;
283                 }
284         else
285                 for (j=0;j<16;++j) {
286                         V = Htable[j];
287                         Htable[j].hi = V.lo<<32|V.lo>>32;
288                         Htable[j].lo = V.hi<<32|V.hi>>32;
289                 }
290         }
291 #endif
292 }
293
294 #ifndef GHASH_ASM
295 static const size_t rem_4bit[16] = {
296         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
297         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
298         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
299         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
300
301 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
302 {
303         u128 Z;
304         int cnt = 15;
305         size_t rem, nlo, nhi;
306         const union { long one; char little; } is_endian = {1};
307
308         nlo  = ((const u8 *)Xi)[15];
309         nhi  = nlo>>4;
310         nlo &= 0xf;
311
312         Z.hi = Htable[nlo].hi;
313         Z.lo = Htable[nlo].lo;
314
315         while (1) {
316                 rem  = (size_t)Z.lo&0xf;
317                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
318                 Z.hi = (Z.hi>>4);
319                 if (sizeof(size_t)==8)
320                         Z.hi ^= rem_4bit[rem];
321                 else
322                         Z.hi ^= (u64)rem_4bit[rem]<<32;
323
324                 Z.hi ^= Htable[nhi].hi;
325                 Z.lo ^= Htable[nhi].lo;
326
327                 if (--cnt<0)            break;
328
329                 nlo  = ((const u8 *)Xi)[cnt];
330                 nhi  = nlo>>4;
331                 nlo &= 0xf;
332
333                 rem  = (size_t)Z.lo&0xf;
334                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
335                 Z.hi = (Z.hi>>4);
336                 if (sizeof(size_t)==8)
337                         Z.hi ^= rem_4bit[rem];
338                 else
339                         Z.hi ^= (u64)rem_4bit[rem]<<32;
340
341                 Z.hi ^= Htable[nlo].hi;
342                 Z.lo ^= Htable[nlo].lo;
343         }
344
345         if (is_endian.little) {
346 #ifdef BSWAP8
347                 Xi[0] = BSWAP8(Z.hi);
348                 Xi[1] = BSWAP8(Z.lo);
349 #else
350                 u8 *p = (u8 *)Xi;
351                 u32 v;
352                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
353                 v = (u32)(Z.hi);        PUTU32(p+4,v);
354                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
355                 v = (u32)(Z.lo);        PUTU32(p+12,v);
356 #endif
357         }
358         else {
359                 Xi[0] = Z.hi;
360                 Xi[1] = Z.lo;
361         }
362 }
363
364 #if !defined(OPENSSL_SMALL_FOOTPRINT)
365 /*
366  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
367  * details... Compiler-generated code doesn't seem to give any
368  * performance improvement, at least not on x86[_64]. It's here
369  * mostly as reference and a placeholder for possible future
370  * non-trivial optimization[s]...
371  */
372 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
373                                 const u8 *inp,size_t len)
374 {
375     u128 Z;
376     int cnt;
377     size_t rem, nlo, nhi;
378     const union { long one; char little; } is_endian = {1};
379
380 #if 1
381     do {
382         cnt  = 15;
383         nlo  = ((const u8 *)Xi)[15];
384         nlo ^= inp[15];
385         nhi  = nlo>>4;
386         nlo &= 0xf;
387
388         Z.hi = Htable[nlo].hi;
389         Z.lo = Htable[nlo].lo;
390
391         while (1) {
392                 rem  = (size_t)Z.lo&0xf;
393                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
394                 Z.hi = (Z.hi>>4);
395                 if (sizeof(size_t)==8)
396                         Z.hi ^= rem_4bit[rem];
397                 else
398                         Z.hi ^= (u64)rem_4bit[rem]<<32;
399
400                 Z.hi ^= Htable[nhi].hi;
401                 Z.lo ^= Htable[nhi].lo;
402
403                 if (--cnt<0)            break;
404
405                 nlo  = ((const u8 *)Xi)[cnt];
406                 nlo ^= inp[cnt];
407                 nhi  = nlo>>4;
408                 nlo &= 0xf;
409
410                 rem  = (size_t)Z.lo&0xf;
411                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
412                 Z.hi = (Z.hi>>4);
413                 if (sizeof(size_t)==8)
414                         Z.hi ^= rem_4bit[rem];
415                 else
416                         Z.hi ^= (u64)rem_4bit[rem]<<32;
417
418                 Z.hi ^= Htable[nlo].hi;
419                 Z.lo ^= Htable[nlo].lo;
420         }
421 #else
422     /*
423      * Extra 256+16 bytes per-key plus 512 bytes shared tables
424      * [should] give ~50% improvement... One could have PACK()-ed
425      * the rem_8bit even here, but the priority is to minimize
426      * cache footprint...
427      */ 
428     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
429     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
430     static const unsigned short rem_8bit[256] = {
431         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
432         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
433         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
434         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
435         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
436         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
437         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
438         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
439         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
440         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
441         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
442         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
443         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
444         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
445         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
446         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
447         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
448         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
449         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
450         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
451         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
452         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
453         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
454         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
455         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
456         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
457         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
458         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
459         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
460         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
461         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
462         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
463     /*
464      * This pre-processing phase slows down procedure by approximately
465      * same time as it makes each loop spin faster. In other words
466      * single block performance is approximately same as straightforward
467      * "4-bit" implementation, and then it goes only faster...
468      */
469     for (cnt=0; cnt<16; ++cnt) {
470         Z.hi = Htable[cnt].hi;
471         Z.lo = Htable[cnt].lo;
472         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
473         Hshr4[cnt].hi = (Z.hi>>4);
474         Hshl4[cnt]    = (u8)(Z.lo<<4);
475     }
476
477     do {
478         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
479                 nlo  = ((const u8 *)Xi)[cnt];
480                 nlo ^= inp[cnt];
481                 nhi  = nlo>>4;
482                 nlo &= 0xf;
483
484                 Z.hi ^= Htable[nlo].hi;
485                 Z.lo ^= Htable[nlo].lo;
486
487                 rem = (size_t)Z.lo&0xff;
488
489                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
490                 Z.hi = (Z.hi>>8);
491
492                 Z.hi ^= Hshr4[nhi].hi;
493                 Z.lo ^= Hshr4[nhi].lo;
494                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
495         }
496
497         nlo  = ((const u8 *)Xi)[0];
498         nlo ^= inp[0];
499         nhi  = nlo>>4;
500         nlo &= 0xf;
501
502         Z.hi ^= Htable[nlo].hi;
503         Z.lo ^= Htable[nlo].lo;
504
505         rem = (size_t)Z.lo&0xf;
506
507         Z.lo = (Z.hi<<60)|(Z.lo>>4);
508         Z.hi = (Z.hi>>4);
509
510         Z.hi ^= Htable[nhi].hi;
511         Z.lo ^= Htable[nhi].lo;
512         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
513 #endif
514
515         if (is_endian.little) {
516 #ifdef BSWAP8
517                 Xi[0] = BSWAP8(Z.hi);
518                 Xi[1] = BSWAP8(Z.lo);
519 #else
520                 u8 *p = (u8 *)Xi;
521                 u32 v;
522                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
523                 v = (u32)(Z.hi);        PUTU32(p+4,v);
524                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
525                 v = (u32)(Z.lo);        PUTU32(p+12,v);
526 #endif
527         }
528         else {
529                 Xi[0] = Z.hi;
530                 Xi[1] = Z.lo;
531         }
532     } while (inp+=16, len-=16);
533 }
534 #endif
535 #else
536 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
537 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
538 #endif
539
540 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
541 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
542 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
543 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
544  * trashing effect. In other words idea is to hash data while it's
545  * still in L1 cache after encryption pass... */
546 #define GHASH_CHUNK       (3*1024)
547 #endif
548
549 #else   /* TABLE_BITS */
550
551 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
552 {
553         u128 V,Z = { 0,0 };
554         long X;
555         int  i,j;
556         const long *xi = (const long *)Xi;
557         const union { long one; char little; } is_endian = {1};
558
559         V.hi = H[0];    /* H is in host byte order, no byte swapping */
560         V.lo = H[1];
561
562         for (j=0; j<16/sizeof(long); ++j) {
563                 if (is_endian.little) {
564                         if (sizeof(long)==8) {
565 #ifdef BSWAP8
566                                 X = (long)(BSWAP8(xi[j]));
567 #else
568                                 const u8 *p = (const u8 *)(xi+j);
569                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
570 #endif
571                         }
572                         else {
573                                 const u8 *p = (const u8 *)(xi+j);
574                                 X = (long)GETU32(p);
575                         }
576                 }
577                 else
578                         X = xi[j];
579
580                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
581                         u64 M = (u64)(X>>(8*sizeof(long)-1));
582                         Z.hi ^= V.hi&M;
583                         Z.lo ^= V.lo&M;
584
585                         REDUCE1BIT(V);
586                 }
587         }
588
589         if (is_endian.little) {
590 #ifdef BSWAP8
591                 Xi[0] = BSWAP8(Z.hi);
592                 Xi[1] = BSWAP8(Z.lo);
593 #else
594                 u8 *p = (u8 *)Xi;
595                 u32 v;
596                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
597                 v = (u32)(Z.hi);        PUTU32(p+4,v);
598                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
599                 v = (u32)(Z.lo);        PUTU32(p+12,v);
600 #endif
601         }
602         else {
603                 Xi[0] = Z.hi;
604                 Xi[1] = Z.lo;
605         }
606 }
607 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
608
609 #endif
610
611 #if     TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
612         (defined(__i386)        || defined(__i386__)    || \
613          defined(__x86_64)      || defined(__x86_64__)  || \
614          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
615 # define GHASH_ASM_IAX
616 extern unsigned int OPENSSL_ia32cap_P[2];
617
618 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
619 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
620 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
621
622 # if    defined(__i386) || defined(__i386__) || defined(_M_IX86)
623 #  define GHASH_ASM_X86
624 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
625 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
626
627 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
628 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
629 # endif
630
631 # undef  GCM_MUL
632 # define GCM_MUL(ctx,Xi)   (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
633 # undef  GHASH
634 # define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
635 #endif
636
637 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
638 {
639         const union { long one; char little; } is_endian = {1};
640
641         memset(ctx,0,sizeof(*ctx));
642         ctx->block = block;
643         ctx->key   = key;
644
645         (*block)(ctx->H.c,ctx->H.c,key);
646
647         if (is_endian.little) {
648                 /* H is stored in host byte order */
649 #ifdef BSWAP8
650                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
651                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
652 #else
653                 u8 *p = ctx->H.c;
654                 u64 hi,lo;
655                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
656                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
657                 ctx->H.u[0] = hi;
658                 ctx->H.u[1] = lo;
659 #endif
660         }
661
662 #if     TABLE_BITS==8
663         gcm_init_8bit(ctx->Htable,ctx->H.u);
664 #elif   TABLE_BITS==4
665 # if    defined(GHASH_ASM_IAX)                  /* both x86 and x86_64 */
666 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
667         if (OPENSSL_ia32cap_P[1]&(1<<1)) {
668                 gcm_init_clmul(ctx->Htable,ctx->H.u);
669                 ctx->gmult = gcm_gmult_clmul;
670                 ctx->ghash = gcm_ghash_clmul;
671                 return;
672         }
673 #  endif
674         gcm_init_4bit(ctx->Htable,ctx->H.u);
675 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
676         if (OPENSSL_ia32cap_P[0]&(1<<23)) {
677                 ctx->gmult = gcm_gmult_4bit_mmx;
678                 ctx->ghash = gcm_ghash_4bit_mmx;
679         } else {
680                 ctx->gmult = gcm_gmult_4bit_x86;
681                 ctx->ghash = gcm_ghash_4bit_x86;
682         }
683 #  else
684         ctx->gmult = gcm_gmult_4bit;
685         ctx->ghash = gcm_ghash_4bit;
686 #  endif
687 # else
688         gcm_init_4bit(ctx->Htable,ctx->H.u);
689 # endif
690 #endif
691 }
692
693 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
694 {
695         const union { long one; char little; } is_endian = {1};
696         unsigned int ctr;
697
698         ctx->Yi.u[0]  = 0;
699         ctx->Yi.u[1]  = 0;
700         ctx->Xi.u[0]  = 0;
701         ctx->Xi.u[1]  = 0;
702         ctx->len.u[0] = 0;      /* AAD length */
703         ctx->len.u[1] = 0;      /* message length */
704         ctx->ares = 0;
705         ctx->mres = 0;
706
707         if (len==12) {
708                 memcpy(ctx->Yi.c,iv,12);
709                 ctx->Yi.c[15]=1;
710                 ctr=1;
711         }
712         else {
713                 size_t i;
714                 u64 len0 = len;
715
716                 while (len>=16) {
717                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
718                         GCM_MUL(ctx,Yi);
719                         iv += 16;
720                         len -= 16;
721                 }
722                 if (len) {
723                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
724                         GCM_MUL(ctx,Yi);
725                 }
726                 len0 <<= 3;
727                 if (is_endian.little) {
728 #ifdef BSWAP8
729                         ctx->Yi.u[1]  ^= BSWAP8(len0);
730 #else
731                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
732                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
733                         ctx->Yi.c[10] ^= (u8)(len0>>40);
734                         ctx->Yi.c[11] ^= (u8)(len0>>32);
735                         ctx->Yi.c[12] ^= (u8)(len0>>24);
736                         ctx->Yi.c[13] ^= (u8)(len0>>16);
737                         ctx->Yi.c[14] ^= (u8)(len0>>8);
738                         ctx->Yi.c[15] ^= (u8)(len0);
739 #endif
740                 }
741                 else
742                         ctx->Yi.u[1]  ^= len0;
743
744                 GCM_MUL(ctx,Yi);
745
746                 if (is_endian.little)
747                         ctr = GETU32(ctx->Yi.c+12);
748                 else
749                         ctr = ctx->Yi.d[3];
750         }
751
752         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
753         ++ctr;
754         if (is_endian.little)
755                 PUTU32(ctx->Yi.c+12,ctr);
756         else
757                 ctx->Yi.d[3] = ctr;
758 }
759
760 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
761 {
762         size_t i;
763         unsigned int n;
764         u64 alen = ctx->len.u[0];
765
766         if (ctx->len.u[1]) return -2;
767
768         alen += len;
769         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
770                 return -1;
771         ctx->len.u[0] = alen;
772
773         n = ctx->ares;
774         if (n) {
775                 while (n && len) {
776                         ctx->Xi.c[n] ^= *(aad++);
777                         --len;
778                         n = (n+1)%16;
779                 }
780                 if (n==0) GCM_MUL(ctx,Xi);
781                 else {
782                         ctx->ares = n;
783                         return 0;
784                 }
785         }
786
787 #ifdef GHASH
788         if ((i = (len&(size_t)-16))) {
789                 GHASH(ctx,aad,i);
790                 aad += i;
791                 len -= i;
792         }
793 #else
794         while (len>=16) {
795                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
796                 GCM_MUL(ctx,Xi);
797                 aad += 16;
798                 len -= 16;
799         }
800 #endif
801         if (len) {
802                 n = (unsigned int)len;
803                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
804         }
805
806         ctx->ares = n;
807         return 0;
808 }
809
810 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
811                 const unsigned char *in, unsigned char *out,
812                 size_t len)
813 {
814         const union { long one; char little; } is_endian = {1};
815         unsigned int n, ctr;
816         size_t i;
817         u64 mlen = ctx->len.u[1];
818
819 #if 0
820         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
821 #endif
822         mlen += len;
823         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
824                 return -1;
825         ctx->len.u[1] = mlen;
826
827         if (ctx->ares) {
828                 /* First call to encrypt finalizes GHASH(AAD) */
829                 GCM_MUL(ctx,Xi);
830                 ctx->ares = 0;
831         }
832
833         if (is_endian.little)
834                 ctr = GETU32(ctx->Yi.c+12);
835         else
836                 ctr = ctx->Yi.d[3];
837
838         n = ctx->mres;
839 #if !defined(OPENSSL_SMALL_FOOTPRINT)
840         if (16%sizeof(size_t) == 0) do {        /* always true actually */
841                 if (n) {
842                         while (n && len) {
843                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
844                                 --len;
845                                 n = (n+1)%16;
846                         }
847                         if (n==0) GCM_MUL(ctx,Xi);
848                         else {
849                                 ctx->mres = n;
850                                 return 0;
851                         }
852                 }
853 #if defined(STRICT_ALIGNMENT)
854                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
855                         break;
856 #endif
857 #if defined(GHASH) && defined(GHASH_CHUNK)
858                 while (len>=GHASH_CHUNK) {
859                     size_t j=GHASH_CHUNK;
860
861                     while (j) {
862                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
863                         ++ctr;
864                         if (is_endian.little)
865                                 PUTU32(ctx->Yi.c+12,ctr);
866                         else
867                                 ctx->Yi.d[3] = ctr;
868                         for (i=0; i<16; i+=sizeof(size_t))
869                                 *(size_t *)(out+i) =
870                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
871                         out += 16;
872                         in  += 16;
873                         j   -= 16;
874                     }
875                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
876                     len -= GHASH_CHUNK;
877                 }
878                 if ((i = (len&(size_t)-16))) {
879                     size_t j=i;
880
881                     while (len>=16) {
882                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
883                         ++ctr;
884                         if (is_endian.little)
885                                 PUTU32(ctx->Yi.c+12,ctr);
886                         else
887                                 ctx->Yi.d[3] = ctr;
888                         for (i=0; i<16; i+=sizeof(size_t))
889                                 *(size_t *)(out+i) =
890                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
891                         out += 16;
892                         in  += 16;
893                         len -= 16;
894                     }
895                     GHASH(ctx,out-j,j);
896                 }
897 #else
898                 while (len>=16) {
899                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
900                         ++ctr;
901                         if (is_endian.little)
902                                 PUTU32(ctx->Yi.c+12,ctr);
903                         else
904                                 ctx->Yi.d[3] = ctr;
905                         for (i=0; i<16; i+=sizeof(size_t))
906                                 *(size_t *)(ctx->Xi.c+i) ^=
907                                 *(size_t *)(out+i) =
908                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
909                         GCM_MUL(ctx,Xi);
910                         out += 16;
911                         in  += 16;
912                         len -= 16;
913                 }
914 #endif
915                 if (len) {
916                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
917                         ++ctr;
918                         if (is_endian.little)
919                                 PUTU32(ctx->Yi.c+12,ctr);
920                         else
921                                 ctx->Yi.d[3] = ctr;
922                         while (len--) {
923                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
924                                 ++n;
925                         }
926                 }
927
928                 ctx->mres = n;
929                 return 0;
930         } while(0);
931 #endif
932         for (i=0;i<len;++i) {
933                 if (n==0) {
934                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
935                         ++ctr;
936                         if (is_endian.little)
937                                 PUTU32(ctx->Yi.c+12,ctr);
938                         else
939                                 ctx->Yi.d[3] = ctr;
940                 }
941                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
942                 n = (n+1)%16;
943                 if (n==0)
944                         GCM_MUL(ctx,Xi);
945         }
946
947         ctx->mres = n;
948         return 0;
949 }
950
951 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
952                 const unsigned char *in, unsigned char *out,
953                 size_t len)
954 {
955         const union { long one; char little; } is_endian = {1};
956         unsigned int n, ctr;
957         size_t i;
958         u64 mlen = ctx->len.u[1];
959
960         mlen += len;
961         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
962                 return -1;
963         ctx->len.u[1] = mlen;
964
965         if (ctx->ares) {
966                 /* First call to decrypt finalizes GHASH(AAD) */
967                 GCM_MUL(ctx,Xi);
968                 ctx->ares = 0;
969         }
970
971         if (is_endian.little)
972                 ctr = GETU32(ctx->Yi.c+12);
973         else
974                 ctr = ctx->Yi.d[3];
975
976         n = ctx->mres;
977 #if !defined(OPENSSL_SMALL_FOOTPRINT)
978         if (16%sizeof(size_t) == 0) do {        /* always true actually */
979                 if (n) {
980                         while (n && len) {
981                                 u8 c = *(in++);
982                                 *(out++) = c^ctx->EKi.c[n];
983                                 ctx->Xi.c[n] ^= c;
984                                 --len;
985                                 n = (n+1)%16;
986                         }
987                         if (n==0) GCM_MUL (ctx,Xi);
988                         else {
989                                 ctx->mres = n;
990                                 return 0;
991                         }
992                 }
993 #if defined(STRICT_ALIGNMENT)
994                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
995                         break;
996 #endif
997 #if defined(GHASH) && defined(GHASH_CHUNK)
998                 while (len>=GHASH_CHUNK) {
999                     size_t j=GHASH_CHUNK;
1000
1001                     GHASH(ctx,in,GHASH_CHUNK);
1002                     while (j) {
1003                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1004                         ++ctr;
1005                         if (is_endian.little)
1006                                 PUTU32(ctx->Yi.c+12,ctr);
1007                         else
1008                                 ctx->Yi.d[3] = ctr;
1009                         for (i=0; i<16; i+=sizeof(size_t))
1010                                 *(size_t *)(out+i) =
1011                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1012                         out += 16;
1013                         in  += 16;
1014                         j   -= 16;
1015                     }
1016                     len -= GHASH_CHUNK;
1017                 }
1018                 if ((i = (len&(size_t)-16))) {
1019                     GHASH(ctx,in,i);
1020                     while (len>=16) {
1021                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1022                         ++ctr;
1023                         if (is_endian.little)
1024                                 PUTU32(ctx->Yi.c+12,ctr);
1025                         else
1026                                 ctx->Yi.d[3] = ctr;
1027                         for (i=0; i<16; i+=sizeof(size_t))
1028                                 *(size_t *)(out+i) =
1029                                 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1030                         out += 16;
1031                         in  += 16;
1032                         len -= 16;
1033                     }
1034                 }
1035 #else
1036                 while (len>=16) {
1037                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1038                         ++ctr;
1039                         if (is_endian.little)
1040                                 PUTU32(ctx->Yi.c+12,ctr);
1041                         else
1042                                 ctx->Yi.d[3] = ctr;
1043                         for (i=0; i<16; i+=sizeof(size_t)) {
1044                                 size_t c = *(size_t *)(in+i);
1045                                 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1046                                 *(size_t *)(ctx->Xi.c+i) ^= c;
1047                         }
1048                         GCM_MUL(ctx,Xi);
1049                         out += 16;
1050                         in  += 16;
1051                         len -= 16;
1052                 }
1053 #endif
1054                 if (len) {
1055                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1056                         ++ctr;
1057                         if (is_endian.little)
1058                                 PUTU32(ctx->Yi.c+12,ctr);
1059                         else
1060                                 ctx->Yi.d[3] = ctr;
1061                         while (len--) {
1062                                 u8 c = in[n];
1063                                 ctx->Xi.c[n] ^= c;
1064                                 out[n] = c^ctx->EKi.c[n];
1065                                 ++n;
1066                         }
1067                 }
1068
1069                 ctx->mres = n;
1070                 return 0;
1071         } while(0);
1072 #endif
1073         for (i=0;i<len;++i) {
1074                 u8 c;
1075                 if (n==0) {
1076                         (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1077                         ++ctr;
1078                         if (is_endian.little)
1079                                 PUTU32(ctx->Yi.c+12,ctr);
1080                         else
1081                                 ctx->Yi.d[3] = ctr;
1082                 }
1083                 c = in[i];
1084                 out[i] = c^ctx->EKi.c[n];
1085                 ctx->Xi.c[n] ^= c;
1086                 n = (n+1)%16;
1087                 if (n==0)
1088                         GCM_MUL(ctx,Xi);
1089         }
1090
1091         ctx->mres = n;
1092         return 0;
1093 }
1094
1095 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1096                 const unsigned char *in, unsigned char *out,
1097                 size_t len, ctr128_f stream)
1098 {
1099         const union { long one; char little; } is_endian = {1};
1100         unsigned int n, ctr;
1101         size_t i;
1102         u64 mlen = ctx->len.u[1];
1103
1104         mlen += len;
1105         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1106                 return -1;
1107         ctx->len.u[1] = mlen;
1108
1109         if (ctx->ares) {
1110                 /* First call to encrypt finalizes GHASH(AAD) */
1111                 GCM_MUL(ctx,Xi);
1112                 ctx->ares = 0;
1113         }
1114
1115         if (is_endian.little)
1116                 ctr = GETU32(ctx->Yi.c+12);
1117         else
1118                 ctr = ctx->Yi.d[3];
1119
1120         n = ctx->mres;
1121         if (n) {
1122                 while (n && len) {
1123                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1124                         --len;
1125                         n = (n+1)%16;
1126                 }
1127                 if (n==0) GCM_MUL(ctx,Xi);
1128                 else {
1129                         ctx->mres = n;
1130                         return 0;
1131                 }
1132         }
1133 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1134         while (len>=GHASH_CHUNK) {
1135                 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1136                 ctr += GHASH_CHUNK/16;
1137                 if (is_endian.little)
1138                         PUTU32(ctx->Yi.c+12,ctr);
1139                 else
1140                         ctx->Yi.d[3] = ctr;
1141                 GHASH(ctx,out,GHASH_CHUNK);
1142                 out += GHASH_CHUNK;
1143                 in  += GHASH_CHUNK;
1144                 len -= GHASH_CHUNK;
1145         }
1146 #endif
1147         if ((i = (len&(size_t)-16))) {
1148                 size_t j=i/16;
1149
1150                 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
1151                 ctr += (unsigned int)j;
1152                 if (is_endian.little)
1153                         PUTU32(ctx->Yi.c+12,ctr);
1154                 else
1155                         ctx->Yi.d[3] = ctr;
1156                 in  += i;
1157                 len -= i;
1158 #if defined(GHASH)
1159                 GHASH(ctx,out,i);
1160                 out += i;
1161 #else
1162                 while (j--) {
1163                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1164                         GCM_MUL(ctx,Xi);
1165                         out += 16;
1166                 }
1167 #endif
1168         }
1169         if (len) {
1170                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1171                 ++ctr;
1172                 if (is_endian.little)
1173                         PUTU32(ctx->Yi.c+12,ctr);
1174                 else
1175                         ctx->Yi.d[3] = ctr;
1176                 while (len--) {
1177                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1178                         ++n;
1179                 }
1180         }
1181
1182         ctx->mres = n;
1183         return 0;
1184 }
1185
1186 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1187                 const unsigned char *in, unsigned char *out,
1188                 size_t len,ctr128_f stream)
1189 {
1190         const union { long one; char little; } is_endian = {1};
1191         unsigned int n, ctr;
1192         size_t i;
1193         u64 mlen = ctx->len.u[1];
1194
1195         mlen += len;
1196         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1197                 return -1;
1198         ctx->len.u[1] = mlen;
1199
1200         if (ctx->ares) {
1201                 /* First call to decrypt finalizes GHASH(AAD) */
1202                 GCM_MUL(ctx,Xi);
1203                 ctx->ares = 0;
1204         }
1205
1206         if (is_endian.little)
1207                 ctr = GETU32(ctx->Yi.c+12);
1208         else
1209                 ctr = ctx->Yi.d[3];
1210
1211         n = ctx->mres;
1212         if (n) {
1213                 while (n && len) {
1214                         u8 c = *(in++);
1215                         *(out++) = c^ctx->EKi.c[n];
1216                         ctx->Xi.c[n] ^= c;
1217                         --len;
1218                         n = (n+1)%16;
1219                 }
1220                 if (n==0) GCM_MUL (ctx,Xi);
1221                 else {
1222                         ctx->mres = n;
1223                         return 0;
1224                 }
1225         }
1226 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1227         while (len>=GHASH_CHUNK) {
1228                 GHASH(ctx,in,GHASH_CHUNK);
1229                 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1230                 ctr += GHASH_CHUNK/16;
1231                 if (is_endian.little)
1232                         PUTU32(ctx->Yi.c+12,ctr);
1233                 else
1234                         ctx->Yi.d[3] = ctr;
1235                 out += GHASH_CHUNK;
1236                 in  += GHASH_CHUNK;
1237                 len -= GHASH_CHUNK;
1238         }
1239 #endif
1240         if ((i = (len&(size_t)-16))) {
1241                 size_t j=i/16;
1242
1243 #if defined(GHASH)
1244                 GHASH(ctx,in,i);
1245 #else
1246                 while (j--) {
1247                         size_t k;
1248                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1249                         GCM_MUL(ctx,Xi);
1250                         in += 16;
1251                 }
1252                 j   = i/16;
1253                 in -= i;
1254 #endif
1255                 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
1256                 ctr += (unsigned int)j;
1257                 if (is_endian.little)
1258                         PUTU32(ctx->Yi.c+12,ctr);
1259                 else
1260                         ctx->Yi.d[3] = ctr;
1261                 out += i;
1262                 in  += i;
1263                 len -= i;
1264         }
1265         if (len) {
1266                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1267                 ++ctr;
1268                 if (is_endian.little)
1269                         PUTU32(ctx->Yi.c+12,ctr);
1270                 else
1271                         ctx->Yi.d[3] = ctr;
1272                 while (len--) {
1273                         u8 c = in[n];
1274                         ctx->Xi.c[n] ^= c;
1275                         out[n] = c^ctx->EKi.c[n];
1276                         ++n;
1277                 }
1278         }
1279
1280         ctx->mres = n;
1281         return 0;
1282 }
1283
1284 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1285                         size_t len)
1286 {
1287         const union { long one; char little; } is_endian = {1};
1288         u64 alen = ctx->len.u[0]<<3;
1289         u64 clen = ctx->len.u[1]<<3;
1290
1291         if (ctx->mres)
1292                 GCM_MUL(ctx,Xi);
1293
1294         if (is_endian.little) {
1295 #ifdef BSWAP8
1296                 alen = BSWAP8(alen);
1297                 clen = BSWAP8(clen);
1298 #else
1299                 u8 *p = ctx->len.c;
1300
1301                 ctx->len.u[0] = alen;
1302                 ctx->len.u[1] = clen;
1303
1304                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1305                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1306 #endif
1307         }
1308
1309         ctx->Xi.u[0] ^= alen;
1310         ctx->Xi.u[1] ^= clen;
1311         GCM_MUL(ctx,Xi);
1312
1313         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1314         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1315
1316         if (tag && len<=sizeof(ctx->Xi))
1317                 return memcmp(ctx->Xi.c,tag,len);
1318         else
1319                 return -1;
1320 }
1321
1322 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1323 {
1324         CRYPTO_gcm128_finish(ctx, NULL, 0);
1325         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1326 }
1327
1328 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1329 {
1330         GCM128_CONTEXT *ret;
1331
1332         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1333                 CRYPTO_gcm128_init(ret,key,block);
1334
1335         return ret;
1336 }
1337
1338 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1339 {
1340         if (ctx) {
1341                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1342                 OPENSSL_free(ctx);
1343         }
1344 }
1345
1346 #if defined(SELFTEST)
1347 #include <stdio.h>
1348 #include <openssl/aes.h>
1349
1350 /* Test Case 1 */
1351 static const u8 K1[16],
1352                 *P1=NULL,
1353                 *A1=NULL,
1354                 IV1[12],
1355                 *C1=NULL,
1356                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1357
1358 /* Test Case 2 */
1359 #define K2 K1
1360 #define A2 A1
1361 #define IV2 IV1
1362 static const u8 P2[16],
1363                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1364                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1365
1366 /* Test Case 3 */
1367 #define A3 A2
1368 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1369                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1370                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1371                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1372                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1373                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1374                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1375                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1376                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1377                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1378                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1379
1380 /* Test Case 4 */
1381 #define K4 K3
1382 #define IV4 IV3
1383 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1384                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1385                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1386                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1387                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1388                         0xab,0xad,0xda,0xd2},
1389                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1390                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1391                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1392                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1393                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1394
1395 /* Test Case 5 */
1396 #define K5 K4
1397 #define P5 P4
1398 static const u8 A5[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1399                         0xab,0xad,0xda,0xd2},
1400                 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1401                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1402                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1403                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1404                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1405                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1406
1407 /* Test Case 6 */
1408 #define K6 K5
1409 #define P6 P5
1410 #define A6 A5
1411 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1412                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1413                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1414                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1415                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1416                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1417                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1418                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1419                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1420
1421 /* Test Case 7 */
1422 static const u8 K7[24],
1423                 *P7=NULL,
1424                 *A7=NULL,
1425                 IV7[12],
1426                 *C7=NULL,
1427                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1428
1429 /* Test Case 8 */
1430 #define K8 K7
1431 #define IV8 IV7
1432 #define A8 A7
1433 static const u8 P8[16],
1434                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1435                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1436
1437 /* Test Case 9 */
1438 #define A9 A8
1439 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1440                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1441                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1442                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1443                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1444                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1445                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1446                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1447                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1448                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1449                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1450                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1451
1452 /* Test Case 10 */
1453 #define K10 K9
1454 #define IV10 IV9
1455 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1456                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1457                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1458                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1459                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1460                         0xab,0xad,0xda,0xd2},
1461                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1462                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1463                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1464                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1465                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1466
1467 /* Test Case 11 */
1468 #define K11 K10
1469 #define P11 P10
1470 #define A11 A10
1471 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1472                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1473                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1474                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1475                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1476                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1477
1478 /* Test Case 12 */
1479 #define K12 K11
1480 #define P12 P11
1481 #define A12 A11
1482 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1483                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1484                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1485                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1486                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1487                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1488                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1489                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1490                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1491
1492 /* Test Case 13 */
1493 static const u8 K13[32],
1494                 *P13=NULL,
1495                 *A13=NULL,
1496                 IV13[12],
1497                 *C13=NULL,
1498                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1499
1500 /* Test Case 14 */
1501 #define K14 K13
1502 #define A14 A13
1503 static const u8 P14[16],
1504                 IV14[12],
1505                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1506                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1507
1508 /* Test Case 15 */
1509 #define A15 A14
1510 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1511                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1512                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1513                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1514                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1515                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1516                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1517                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1518                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1519                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1520                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1521                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1522
1523 /* Test Case 16 */
1524 #define K16 K15
1525 #define IV16 IV15
1526 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1527                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1528                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1529                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1530                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1531                         0xab,0xad,0xda,0xd2},
1532                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1533                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1534                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1535                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1536                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1537
1538 /* Test Case 17 */
1539 #define K17 K16
1540 #define P17 P16
1541 #define A17 A16
1542 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1543                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1544                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1545                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1546                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1547                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1548
1549 /* Test Case 18 */
1550 #define K18 K17
1551 #define P18 P17
1552 #define A18 A17
1553 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1554                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1555                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1556                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1557                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1558                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1559                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1560                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1561                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1562
1563 #define TEST_CASE(n)    do {                                    \
1564         u8 out[sizeof(P##n)];                                   \
1565         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1566         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1567         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1568         memset(out,0,sizeof(out));                              \
1569         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1570         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1571         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1572             (C##n && memcmp(out,C##n,sizeof(out))))             \
1573                 ret++, printf ("encrypt test#%d failed.\n",n);  \
1574         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1575         memset(out,0,sizeof(out));                              \
1576         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1577         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1578         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1579             (P##n && memcmp(out,P##n,sizeof(out))))             \
1580                 ret++, printf ("decrypt test#%d failed.\n",n);  \
1581         } while(0)
1582
1583 int main()
1584 {
1585         GCM128_CONTEXT ctx;
1586         AES_KEY key;
1587         int ret=0;
1588
1589         TEST_CASE(1);
1590         TEST_CASE(2);
1591         TEST_CASE(3);
1592         TEST_CASE(4);
1593         TEST_CASE(5);
1594         TEST_CASE(6);
1595         TEST_CASE(7);
1596         TEST_CASE(8);
1597         TEST_CASE(9);
1598         TEST_CASE(10);
1599         TEST_CASE(11);
1600         TEST_CASE(12);
1601         TEST_CASE(13);
1602         TEST_CASE(14);
1603         TEST_CASE(15);
1604         TEST_CASE(16);
1605         TEST_CASE(17);
1606         TEST_CASE(18);
1607
1608 #ifdef OPENSSL_CPUID_OBJ
1609         {
1610         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1611         union { u64 u; u8 c[1024]; } buf;
1612         int i;
1613
1614         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1615         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1616         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1617
1618         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1619         start = OPENSSL_rdtsc();
1620         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1621         gcm_t = OPENSSL_rdtsc() - start;
1622
1623         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1624                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1625                         (block128_f)AES_encrypt);
1626         start = OPENSSL_rdtsc();
1627         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1628                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1629                         (block128_f)AES_encrypt);
1630         ctr_t = OPENSSL_rdtsc() - start;
1631
1632         printf("%.2f-%.2f=%.2f\n",
1633                         gcm_t/(double)sizeof(buf),
1634                         ctr_t/(double)sizeof(buf),
1635                         (gcm_t-ctr_t)/(double)sizeof(buf));
1636 #ifdef GHASH
1637         GHASH(&ctx,buf.c,sizeof(buf));
1638         start = OPENSSL_rdtsc();
1639         for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1640         gcm_t = OPENSSL_rdtsc() - start;
1641         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1642 #endif
1643         }
1644 #endif
1645
1646         return ret;
1647 }
1648 #endif