Usage for -hack and -prexit -verify_return_error
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if     TABLE_BITS==8
120
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123         int  i, j;
124         u128 V;
125
126         Htable[0].hi = 0;
127         Htable[0].lo = 0;
128         V.hi = H[0];
129         V.lo = H[1];
130
131         for (Htable[128]=V, i=64; i>0; i>>=1) {
132                 REDUCE1BIT(V);
133                 Htable[i] = V;
134         }
135
136         for (i=2; i<256; i<<=1) {
137                 u128 *Hi = Htable+i, H0 = *Hi;
138                 for (j=1; j<i; ++j) {
139                         Hi[j].hi = H0.hi^Htable[j].hi;
140                         Hi[j].lo = H0.lo^Htable[j].lo;
141                 }
142         }
143 }
144
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147         u128 Z = { 0, 0};
148         const u8 *xi = (const u8 *)Xi+15;
149         size_t rem, n = *xi;
150         const union { long one; char little; } is_endian = {1};
151         __fips_constseg
152         static const size_t rem_8bit[256] = {
153                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
217
218         while (1) {
219                 Z.hi ^= Htable[n].hi;
220                 Z.lo ^= Htable[n].lo;
221
222                 if ((u8 *)Xi==xi)       break;
223
224                 n = *(--xi);
225
226                 rem  = (size_t)Z.lo&0xff;
227                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228                 Z.hi = (Z.hi>>8);
229                 if (sizeof(size_t)==8)
230                         Z.hi ^= rem_8bit[rem];
231                 else
232                         Z.hi ^= (u64)rem_8bit[rem]<<32;
233         }
234
235         if (is_endian.little) {
236 #ifdef BSWAP8
237                 Xi[0] = BSWAP8(Z.hi);
238                 Xi[1] = BSWAP8(Z.lo);
239 #else
240                 u8 *p = (u8 *)Xi;
241                 u32 v;
242                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
243                 v = (u32)(Z.hi);        PUTU32(p+4,v);
244                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
245                 v = (u32)(Z.lo);        PUTU32(p+12,v);
246 #endif
247         }
248         else {
249                 Xi[0] = Z.hi;
250                 Xi[1] = Z.lo;
251         }
252 }
253 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
254
255 #elif   TABLE_BITS==4
256
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
258 {
259         u128 V;
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
261         int  i;
262 #endif
263
264         Htable[0].hi = 0;
265         Htable[0].lo = 0;
266         V.hi = H[0];
267         V.lo = H[1];
268
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270         for (Htable[8]=V, i=4; i>0; i>>=1) {
271                 REDUCE1BIT(V);
272                 Htable[i] = V;
273         }
274
275         for (i=2; i<16; i<<=1) {
276                 u128 *Hi = Htable+i;
277                 int   j;
278                 for (V=*Hi, j=1; j<i; ++j) {
279                         Hi[j].hi = V.hi^Htable[j].hi;
280                         Hi[j].lo = V.lo^Htable[j].lo;
281                 }
282         }
283 #else
284         Htable[8] = V;
285         REDUCE1BIT(V);
286         Htable[4] = V;
287         REDUCE1BIT(V);
288         Htable[2] = V;
289         REDUCE1BIT(V);
290         Htable[1] = V;
291         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
292         V=Htable[4];
293         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
294         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
295         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
296         V=Htable[8];
297         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
298         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304 #endif
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306         /*
307          * ARM assembler expects specific dword order in Htable.
308          */
309         {
310         int j;
311         const union { long one; char little; } is_endian = {1};
312
313         if (is_endian.little)
314                 for (j=0;j<16;++j) {
315                         V = Htable[j];
316                         Htable[j].hi = V.lo;
317                         Htable[j].lo = V.hi;
318                 }
319         else
320                 for (j=0;j<16;++j) {
321                         V = Htable[j];
322                         Htable[j].hi = V.lo<<32|V.lo>>32;
323                         Htable[j].lo = V.hi<<32|V.hi>>32;
324                 }
325         }
326 #endif
327 }
328
329 #ifndef GHASH_ASM
330 __fips_constseg
331 static const size_t rem_4bit[16] = {
332         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
338 {
339         u128 Z;
340         int cnt = 15;
341         size_t rem, nlo, nhi;
342         const union { long one; char little; } is_endian = {1};
343
344         nlo  = ((const u8 *)Xi)[15];
345         nhi  = nlo>>4;
346         nlo &= 0xf;
347
348         Z.hi = Htable[nlo].hi;
349         Z.lo = Htable[nlo].lo;
350
351         while (1) {
352                 rem  = (size_t)Z.lo&0xf;
353                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354                 Z.hi = (Z.hi>>4);
355                 if (sizeof(size_t)==8)
356                         Z.hi ^= rem_4bit[rem];
357                 else
358                         Z.hi ^= (u64)rem_4bit[rem]<<32;
359
360                 Z.hi ^= Htable[nhi].hi;
361                 Z.lo ^= Htable[nhi].lo;
362
363                 if (--cnt<0)            break;
364
365                 nlo  = ((const u8 *)Xi)[cnt];
366                 nhi  = nlo>>4;
367                 nlo &= 0xf;
368
369                 rem  = (size_t)Z.lo&0xf;
370                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371                 Z.hi = (Z.hi>>4);
372                 if (sizeof(size_t)==8)
373                         Z.hi ^= rem_4bit[rem];
374                 else
375                         Z.hi ^= (u64)rem_4bit[rem]<<32;
376
377                 Z.hi ^= Htable[nlo].hi;
378                 Z.lo ^= Htable[nlo].lo;
379         }
380
381         if (is_endian.little) {
382 #ifdef BSWAP8
383                 Xi[0] = BSWAP8(Z.hi);
384                 Xi[1] = BSWAP8(Z.lo);
385 #else
386                 u8 *p = (u8 *)Xi;
387                 u32 v;
388                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
389                 v = (u32)(Z.hi);        PUTU32(p+4,v);
390                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
391                 v = (u32)(Z.lo);        PUTU32(p+12,v);
392 #endif
393         }
394         else {
395                 Xi[0] = Z.hi;
396                 Xi[1] = Z.lo;
397         }
398 }
399
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
401 /*
402  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403  * details... Compiler-generated code doesn't seem to give any
404  * performance improvement, at least not on x86[_64]. It's here
405  * mostly as reference and a placeholder for possible future
406  * non-trivial optimization[s]...
407  */
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409                                 const u8 *inp,size_t len)
410 {
411     u128 Z;
412     int cnt;
413     size_t rem, nlo, nhi;
414     const union { long one; char little; } is_endian = {1};
415
416 #if 1
417     do {
418         cnt  = 15;
419         nlo  = ((const u8 *)Xi)[15];
420         nlo ^= inp[15];
421         nhi  = nlo>>4;
422         nlo &= 0xf;
423
424         Z.hi = Htable[nlo].hi;
425         Z.lo = Htable[nlo].lo;
426
427         while (1) {
428                 rem  = (size_t)Z.lo&0xf;
429                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
430                 Z.hi = (Z.hi>>4);
431                 if (sizeof(size_t)==8)
432                         Z.hi ^= rem_4bit[rem];
433                 else
434                         Z.hi ^= (u64)rem_4bit[rem]<<32;
435
436                 Z.hi ^= Htable[nhi].hi;
437                 Z.lo ^= Htable[nhi].lo;
438
439                 if (--cnt<0)            break;
440
441                 nlo  = ((const u8 *)Xi)[cnt];
442                 nlo ^= inp[cnt];
443                 nhi  = nlo>>4;
444                 nlo &= 0xf;
445
446                 rem  = (size_t)Z.lo&0xf;
447                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
448                 Z.hi = (Z.hi>>4);
449                 if (sizeof(size_t)==8)
450                         Z.hi ^= rem_4bit[rem];
451                 else
452                         Z.hi ^= (u64)rem_4bit[rem]<<32;
453
454                 Z.hi ^= Htable[nlo].hi;
455                 Z.lo ^= Htable[nlo].lo;
456         }
457 #else
458     /*
459      * Extra 256+16 bytes per-key plus 512 bytes shared tables
460      * [should] give ~50% improvement... One could have PACK()-ed
461      * the rem_8bit even here, but the priority is to minimize
462      * cache footprint...
463      */ 
464     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
465     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
466     __fips_constseg
467     static const unsigned short rem_8bit[256] = {
468         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
500     /*
501      * This pre-processing phase slows down procedure by approximately
502      * same time as it makes each loop spin faster. In other words
503      * single block performance is approximately same as straightforward
504      * "4-bit" implementation, and then it goes only faster...
505      */
506     for (cnt=0; cnt<16; ++cnt) {
507         Z.hi = Htable[cnt].hi;
508         Z.lo = Htable[cnt].lo;
509         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510         Hshr4[cnt].hi = (Z.hi>>4);
511         Hshl4[cnt]    = (u8)(Z.lo<<4);
512     }
513
514     do {
515         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516                 nlo  = ((const u8 *)Xi)[cnt];
517                 nlo ^= inp[cnt];
518                 nhi  = nlo>>4;
519                 nlo &= 0xf;
520
521                 Z.hi ^= Htable[nlo].hi;
522                 Z.lo ^= Htable[nlo].lo;
523
524                 rem = (size_t)Z.lo&0xff;
525
526                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527                 Z.hi = (Z.hi>>8);
528
529                 Z.hi ^= Hshr4[nhi].hi;
530                 Z.lo ^= Hshr4[nhi].lo;
531                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532         }
533
534         nlo  = ((const u8 *)Xi)[0];
535         nlo ^= inp[0];
536         nhi  = nlo>>4;
537         nlo &= 0xf;
538
539         Z.hi ^= Htable[nlo].hi;
540         Z.lo ^= Htable[nlo].lo;
541
542         rem = (size_t)Z.lo&0xf;
543
544         Z.lo = (Z.hi<<60)|(Z.lo>>4);
545         Z.hi = (Z.hi>>4);
546
547         Z.hi ^= Htable[nhi].hi;
548         Z.lo ^= Htable[nhi].lo;
549         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550 #endif
551
552         if (is_endian.little) {
553 #ifdef BSWAP8
554                 Xi[0] = BSWAP8(Z.hi);
555                 Xi[1] = BSWAP8(Z.lo);
556 #else
557                 u8 *p = (u8 *)Xi;
558                 u32 v;
559                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
560                 v = (u32)(Z.hi);        PUTU32(p+4,v);
561                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
562                 v = (u32)(Z.lo);        PUTU32(p+12,v);
563 #endif
564         }
565         else {
566                 Xi[0] = Z.hi;
567                 Xi[1] = Z.lo;
568         }
569     } while (inp+=16, len-=16);
570 }
571 #endif
572 #else
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
575 #endif
576
577 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581  * trashing effect. In other words idea is to hash data while it's
582  * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK       (3*1024)
584 #endif
585
586 #else   /* TABLE_BITS */
587
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
589 {
590         u128 V,Z = { 0,0 };
591         long X;
592         int  i,j;
593         const long *xi = (const long *)Xi;
594         const union { long one; char little; } is_endian = {1};
595
596         V.hi = H[0];    /* H is in host byte order, no byte swapping */
597         V.lo = H[1];
598
599         for (j=0; j<16/sizeof(long); ++j) {
600                 if (is_endian.little) {
601                         if (sizeof(long)==8) {
602 #ifdef BSWAP8
603                                 X = (long)(BSWAP8(xi[j]));
604 #else
605                                 const u8 *p = (const u8 *)(xi+j);
606                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607 #endif
608                         }
609                         else {
610                                 const u8 *p = (const u8 *)(xi+j);
611                                 X = (long)GETU32(p);
612                         }
613                 }
614                 else
615                         X = xi[j];
616
617                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618                         u64 M = (u64)(X>>(8*sizeof(long)-1));
619                         Z.hi ^= V.hi&M;
620                         Z.lo ^= V.lo&M;
621
622                         REDUCE1BIT(V);
623                 }
624         }
625
626         if (is_endian.little) {
627 #ifdef BSWAP8
628                 Xi[0] = BSWAP8(Z.hi);
629                 Xi[1] = BSWAP8(Z.lo);
630 #else
631                 u8 *p = (u8 *)Xi;
632                 u32 v;
633                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
634                 v = (u32)(Z.hi);        PUTU32(p+4,v);
635                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
636                 v = (u32)(Z.lo);        PUTU32(p+12,v);
637 #endif
638         }
639         else {
640                 Xi[0] = Z.hi;
641                 Xi[1] = Z.lo;
642         }
643 }
644 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645
646 #endif
647
648 #if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
649 # if    !defined(I386_ONLY) && \
650         (defined(__i386)        || defined(__i386__)    || \
651          defined(__x86_64)      || defined(__x86_64__)  || \
652          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
653 #  define GHASH_ASM_X86_OR_64
654 #  define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
656
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660
661 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define gcm_init_avx   gcm_init_clmul
663 # define gcm_gmult_avx  gcm_gmult_clmul
664 # define gcm_ghash_avx  gcm_ghash_clmul
665 #else
666 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
667 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
668 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669 #endif
670
671 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
672 #   define GHASH_ASM_X86
673 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675
676 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
678 #  endif
679 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
680 #  include "arm_arch.h"
681 #  if __ARM_ARCH__>=7
682 #   define GHASH_ASM_ARM
683 #   define GCM_FUNCREF_4BIT
684 #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
685 #   if defined(__arm__) || defined(__arm)
686 #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
687 #   endif
688 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
691 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
692 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
693 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
694 #  endif
695 # elif defined(__sparc__) || defined(__sparc)
696 #  include "sparc_arch.h"
697 #  define GHASH_ASM_SPARC
698 #  define GCM_FUNCREF_4BIT
699 extern unsigned int OPENSSL_sparcv9cap_P[];
700 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
701 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
702 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
703 # endif
704 #endif
705
706 #ifdef GCM_FUNCREF_4BIT
707 # undef  GCM_MUL
708 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
709 # ifdef GHASH
710 #  undef  GHASH
711 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
712 # endif
713 #endif
714
715 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
716 {
717         const union { long one; char little; } is_endian = {1};
718
719         memset(ctx,0,sizeof(*ctx));
720         ctx->block = block;
721         ctx->key   = key;
722
723         (*block)(ctx->H.c,ctx->H.c,key);
724
725         if (is_endian.little) {
726                 /* H is stored in host byte order */
727 #ifdef BSWAP8
728                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
729                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
730 #else
731                 u8 *p = ctx->H.c;
732                 u64 hi,lo;
733                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
734                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
735                 ctx->H.u[0] = hi;
736                 ctx->H.u[1] = lo;
737 #endif
738         }
739
740 #if     TABLE_BITS==8
741         gcm_init_8bit(ctx->Htable,ctx->H.u);
742 #elif   TABLE_BITS==4
743 # if    defined(GHASH_ASM_X86_OR_64)
744 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
745         if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
746             OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
747                 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) {  /* AVX+MOVBE */
748                         gcm_init_avx(ctx->Htable,ctx->H.u);
749                         ctx->gmult = gcm_gmult_avx;
750                         ctx->ghash = gcm_ghash_avx;
751                 } else {
752                         gcm_init_clmul(ctx->Htable,ctx->H.u);
753                         ctx->gmult = gcm_gmult_clmul;
754                         ctx->ghash = gcm_ghash_clmul;
755                 }
756                 return;
757         }
758 #  endif
759         gcm_init_4bit(ctx->Htable,ctx->H.u);
760 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
761 #   if  defined(OPENSSL_IA32_SSE2)
762         if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
763 #   else
764         if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
765 #   endif
766                 ctx->gmult = gcm_gmult_4bit_mmx;
767                 ctx->ghash = gcm_ghash_4bit_mmx;
768         } else {
769                 ctx->gmult = gcm_gmult_4bit_x86;
770                 ctx->ghash = gcm_ghash_4bit_x86;
771         }
772 #  else
773         ctx->gmult = gcm_gmult_4bit;
774         ctx->ghash = gcm_ghash_4bit;
775 #  endif
776 # elif  defined(GHASH_ASM_ARM)
777 #  ifdef PMULL_CAPABLE
778         if (PMULL_CAPABLE) {
779                 gcm_init_v8(ctx->Htable,ctx->H.u);
780                 ctx->gmult = gcm_gmult_v8;
781                 ctx->ghash = gcm_ghash_v8;
782         } else
783 #  endif
784 #  ifdef NEON_CAPABLE
785         if (NEON_CAPABLE) {
786                 gcm_init_neon(ctx->Htable,ctx->H.u);
787                 ctx->gmult = gcm_gmult_neon;
788                 ctx->ghash = gcm_ghash_neon;
789         } else
790 #  endif
791         {
792                 gcm_init_4bit(ctx->Htable,ctx->H.u);
793                 ctx->gmult = gcm_gmult_4bit;
794                 ctx->ghash = gcm_ghash_4bit;
795         }
796 # elif  defined(GHASH_ASM_SPARC)
797         if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
798                 gcm_init_vis3(ctx->Htable,ctx->H.u);
799                 ctx->gmult = gcm_gmult_vis3;
800                 ctx->ghash = gcm_ghash_vis3;
801         } else {
802                 gcm_init_4bit(ctx->Htable,ctx->H.u);
803                 ctx->gmult = gcm_gmult_4bit;
804                 ctx->ghash = gcm_ghash_4bit;
805         }
806 # else
807         gcm_init_4bit(ctx->Htable,ctx->H.u);
808 # endif
809 #endif
810 }
811
812 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
813 {
814         const union { long one; char little; } is_endian = {1};
815         unsigned int ctr;
816 #ifdef GCM_FUNCREF_4BIT
817         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
818 #endif
819
820         ctx->Yi.u[0]  = 0;
821         ctx->Yi.u[1]  = 0;
822         ctx->Xi.u[0]  = 0;
823         ctx->Xi.u[1]  = 0;
824         ctx->len.u[0] = 0;      /* AAD length */
825         ctx->len.u[1] = 0;      /* message length */
826         ctx->ares = 0;
827         ctx->mres = 0;
828
829         if (len==12) {
830                 memcpy(ctx->Yi.c,iv,12);
831                 ctx->Yi.c[15]=1;
832                 ctr=1;
833         }
834         else {
835                 size_t i;
836                 u64 len0 = len;
837
838                 while (len>=16) {
839                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
840                         GCM_MUL(ctx,Yi);
841                         iv += 16;
842                         len -= 16;
843                 }
844                 if (len) {
845                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
846                         GCM_MUL(ctx,Yi);
847                 }
848                 len0 <<= 3;
849                 if (is_endian.little) {
850 #ifdef BSWAP8
851                         ctx->Yi.u[1]  ^= BSWAP8(len0);
852 #else
853                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
854                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
855                         ctx->Yi.c[10] ^= (u8)(len0>>40);
856                         ctx->Yi.c[11] ^= (u8)(len0>>32);
857                         ctx->Yi.c[12] ^= (u8)(len0>>24);
858                         ctx->Yi.c[13] ^= (u8)(len0>>16);
859                         ctx->Yi.c[14] ^= (u8)(len0>>8);
860                         ctx->Yi.c[15] ^= (u8)(len0);
861 #endif
862                 }
863                 else
864                         ctx->Yi.u[1]  ^= len0;
865
866                 GCM_MUL(ctx,Yi);
867
868                 if (is_endian.little)
869 #ifdef BSWAP4
870                         ctr = BSWAP4(ctx->Yi.d[3]);
871 #else
872                         ctr = GETU32(ctx->Yi.c+12);
873 #endif
874                 else
875                         ctr = ctx->Yi.d[3];
876         }
877
878         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
879         ++ctr;
880         if (is_endian.little)
881 #ifdef BSWAP4
882                 ctx->Yi.d[3] = BSWAP4(ctr);
883 #else
884                 PUTU32(ctx->Yi.c+12,ctr);
885 #endif
886         else
887                 ctx->Yi.d[3] = ctr;
888 }
889
890 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
891 {
892         size_t i;
893         unsigned int n;
894         u64 alen = ctx->len.u[0];
895 #ifdef GCM_FUNCREF_4BIT
896         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
897 # ifdef GHASH
898         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
899                                 const u8 *inp,size_t len)       = ctx->ghash;
900 # endif
901 #endif
902
903         if (ctx->len.u[1]) return -2;
904
905         alen += len;
906         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
907                 return -1;
908         ctx->len.u[0] = alen;
909
910         n = ctx->ares;
911         if (n) {
912                 while (n && len) {
913                         ctx->Xi.c[n] ^= *(aad++);
914                         --len;
915                         n = (n+1)%16;
916                 }
917                 if (n==0) GCM_MUL(ctx,Xi);
918                 else {
919                         ctx->ares = n;
920                         return 0;
921                 }
922         }
923
924 #ifdef GHASH
925         if ((i = (len&(size_t)-16))) {
926                 GHASH(ctx,aad,i);
927                 aad += i;
928                 len -= i;
929         }
930 #else
931         while (len>=16) {
932                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
933                 GCM_MUL(ctx,Xi);
934                 aad += 16;
935                 len -= 16;
936         }
937 #endif
938         if (len) {
939                 n = (unsigned int)len;
940                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
941         }
942
943         ctx->ares = n;
944         return 0;
945 }
946
947 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
948                 const unsigned char *in, unsigned char *out,
949                 size_t len)
950 {
951         const union { long one; char little; } is_endian = {1};
952         unsigned int n, ctr;
953         size_t i;
954         u64        mlen  = ctx->len.u[1];
955         block128_f block = ctx->block;
956         void      *key   = ctx->key;
957 #ifdef GCM_FUNCREF_4BIT
958         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
959 # ifdef GHASH
960         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
961                                 const u8 *inp,size_t len)       = ctx->ghash;
962 # endif
963 #endif
964
965 #if 0
966         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
967 #endif
968         mlen += len;
969         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
970                 return -1;
971         ctx->len.u[1] = mlen;
972
973         if (ctx->ares) {
974                 /* First call to encrypt finalizes GHASH(AAD) */
975                 GCM_MUL(ctx,Xi);
976                 ctx->ares = 0;
977         }
978
979         if (is_endian.little)
980 #ifdef BSWAP4
981                 ctr = BSWAP4(ctx->Yi.d[3]);
982 #else
983                 ctr = GETU32(ctx->Yi.c+12);
984 #endif
985         else
986                 ctr = ctx->Yi.d[3];
987
988         n = ctx->mres;
989 #if !defined(OPENSSL_SMALL_FOOTPRINT)
990         if (16%sizeof(size_t) == 0) do {        /* always true actually */
991                 if (n) {
992                         while (n && len) {
993                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
994                                 --len;
995                                 n = (n+1)%16;
996                         }
997                         if (n==0) GCM_MUL(ctx,Xi);
998                         else {
999                                 ctx->mres = n;
1000                                 return 0;
1001                         }
1002                 }
1003 #if defined(STRICT_ALIGNMENT)
1004                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1005                         break;
1006 #endif
1007 #if defined(GHASH) && defined(GHASH_CHUNK)
1008                 while (len>=GHASH_CHUNK) {
1009                     size_t j=GHASH_CHUNK;
1010
1011                     while (j) {
1012                         size_t *out_t=(size_t *)out;
1013                         const size_t *in_t=(const size_t *)in;
1014
1015                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1016                         ++ctr;
1017                         if (is_endian.little)
1018 #ifdef BSWAP4
1019                                 ctx->Yi.d[3] = BSWAP4(ctr);
1020 #else
1021                                 PUTU32(ctx->Yi.c+12,ctr);
1022 #endif
1023                         else
1024                                 ctx->Yi.d[3] = ctr;
1025                         for (i=0; i<16/sizeof(size_t); ++i)
1026                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1027                         out += 16;
1028                         in  += 16;
1029                         j   -= 16;
1030                     }
1031                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1032                     len -= GHASH_CHUNK;
1033                 }
1034                 if ((i = (len&(size_t)-16))) {
1035                     size_t j=i;
1036
1037                     while (len>=16) {
1038                         size_t *out_t=(size_t *)out;
1039                         const size_t *in_t=(const size_t *)in;
1040
1041                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1042                         ++ctr;
1043                         if (is_endian.little)
1044 #ifdef BSWAP4
1045                                 ctx->Yi.d[3] = BSWAP4(ctr);
1046 #else
1047                                 PUTU32(ctx->Yi.c+12,ctr);
1048 #endif
1049                         else
1050                                 ctx->Yi.d[3] = ctr;
1051                         for (i=0; i<16/sizeof(size_t); ++i)
1052                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1053                         out += 16;
1054                         in  += 16;
1055                         len -= 16;
1056                     }
1057                     GHASH(ctx,out-j,j);
1058                 }
1059 #else
1060                 while (len>=16) {
1061                         size_t *out_t=(size_t *)out;
1062                         const size_t *in_t=(const size_t *)in;
1063
1064                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1065                         ++ctr;
1066                         if (is_endian.little)
1067 #ifdef BSWAP4
1068                                 ctx->Yi.d[3] = BSWAP4(ctr);
1069 #else
1070                                 PUTU32(ctx->Yi.c+12,ctr);
1071 #endif
1072                         else
1073                                 ctx->Yi.d[3] = ctr;
1074                         for (i=0; i<16/sizeof(size_t); ++i)
1075                                 ctx->Xi.t[i] ^=
1076                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1077                         GCM_MUL(ctx,Xi);
1078                         out += 16;
1079                         in  += 16;
1080                         len -= 16;
1081                 }
1082 #endif
1083                 if (len) {
1084                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1085                         ++ctr;
1086                         if (is_endian.little)
1087 #ifdef BSWAP4
1088                                 ctx->Yi.d[3] = BSWAP4(ctr);
1089 #else
1090                                 PUTU32(ctx->Yi.c+12,ctr);
1091 #endif
1092                         else
1093                                 ctx->Yi.d[3] = ctr;
1094                         while (len--) {
1095                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1096                                 ++n;
1097                         }
1098                 }
1099
1100                 ctx->mres = n;
1101                 return 0;
1102         } while(0);
1103 #endif
1104         for (i=0;i<len;++i) {
1105                 if (n==0) {
1106                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1107                         ++ctr;
1108                         if (is_endian.little)
1109 #ifdef BSWAP4
1110                                 ctx->Yi.d[3] = BSWAP4(ctr);
1111 #else
1112                                 PUTU32(ctx->Yi.c+12,ctr);
1113 #endif
1114                         else
1115                                 ctx->Yi.d[3] = ctr;
1116                 }
1117                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1118                 n = (n+1)%16;
1119                 if (n==0)
1120                         GCM_MUL(ctx,Xi);
1121         }
1122
1123         ctx->mres = n;
1124         return 0;
1125 }
1126
1127 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1128                 const unsigned char *in, unsigned char *out,
1129                 size_t len)
1130 {
1131         const union { long one; char little; } is_endian = {1};
1132         unsigned int n, ctr;
1133         size_t i;
1134         u64        mlen  = ctx->len.u[1];
1135         block128_f block = ctx->block;
1136         void      *key   = ctx->key;
1137 #ifdef GCM_FUNCREF_4BIT
1138         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1139 # ifdef GHASH
1140         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1141                                 const u8 *inp,size_t len)       = ctx->ghash;
1142 # endif
1143 #endif
1144
1145         mlen += len;
1146         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1147                 return -1;
1148         ctx->len.u[1] = mlen;
1149
1150         if (ctx->ares) {
1151                 /* First call to decrypt finalizes GHASH(AAD) */
1152                 GCM_MUL(ctx,Xi);
1153                 ctx->ares = 0;
1154         }
1155
1156         if (is_endian.little)
1157 #ifdef BSWAP4
1158                 ctr = BSWAP4(ctx->Yi.d[3]);
1159 #else
1160                 ctr = GETU32(ctx->Yi.c+12);
1161 #endif
1162         else
1163                 ctr = ctx->Yi.d[3];
1164
1165         n = ctx->mres;
1166 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1167         if (16%sizeof(size_t) == 0) do {        /* always true actually */
1168                 if (n) {
1169                         while (n && len) {
1170                                 u8 c = *(in++);
1171                                 *(out++) = c^ctx->EKi.c[n];
1172                                 ctx->Xi.c[n] ^= c;
1173                                 --len;
1174                                 n = (n+1)%16;
1175                         }
1176                         if (n==0) GCM_MUL (ctx,Xi);
1177                         else {
1178                                 ctx->mres = n;
1179                                 return 0;
1180                         }
1181                 }
1182 #if defined(STRICT_ALIGNMENT)
1183                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1184                         break;
1185 #endif
1186 #if defined(GHASH) && defined(GHASH_CHUNK)
1187                 while (len>=GHASH_CHUNK) {
1188                     size_t j=GHASH_CHUNK;
1189
1190                     GHASH(ctx,in,GHASH_CHUNK);
1191                     while (j) {
1192                         size_t *out_t=(size_t *)out;
1193                         const size_t *in_t=(const size_t *)in;
1194
1195                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1196                         ++ctr;
1197                         if (is_endian.little)
1198 #ifdef BSWAP4
1199                                 ctx->Yi.d[3] = BSWAP4(ctr);
1200 #else
1201                                 PUTU32(ctx->Yi.c+12,ctr);
1202 #endif
1203                         else
1204                                 ctx->Yi.d[3] = ctr;
1205                         for (i=0; i<16/sizeof(size_t); ++i)
1206                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1207                         out += 16;
1208                         in  += 16;
1209                         j   -= 16;
1210                     }
1211                     len -= GHASH_CHUNK;
1212                 }
1213                 if ((i = (len&(size_t)-16))) {
1214                     GHASH(ctx,in,i);
1215                     while (len>=16) {
1216                         size_t *out_t=(size_t *)out;
1217                         const size_t *in_t=(const size_t *)in;
1218
1219                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1220                         ++ctr;
1221                         if (is_endian.little)
1222 #ifdef BSWAP4
1223                                 ctx->Yi.d[3] = BSWAP4(ctr);
1224 #else
1225                                 PUTU32(ctx->Yi.c+12,ctr);
1226 #endif
1227                         else
1228                                 ctx->Yi.d[3] = ctr;
1229                         for (i=0; i<16/sizeof(size_t); ++i)
1230                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1231                         out += 16;
1232                         in  += 16;
1233                         len -= 16;
1234                     }
1235                 }
1236 #else
1237                 while (len>=16) {
1238                         size_t *out_t=(size_t *)out;
1239                         const size_t *in_t=(const size_t *)in;
1240
1241                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1242                         ++ctr;
1243                         if (is_endian.little)
1244 #ifdef BSWAP4
1245                                 ctx->Yi.d[3] = BSWAP4(ctr);
1246 #else
1247                                 PUTU32(ctx->Yi.c+12,ctr);
1248 #endif
1249                         else
1250                                 ctx->Yi.d[3] = ctr;
1251                         for (i=0; i<16/sizeof(size_t); ++i) {
1252                                 size_t c = in[i];
1253                                 out[i] = c^ctx->EKi.t[i];
1254                                 ctx->Xi.t[i] ^= c;
1255                         }
1256                         GCM_MUL(ctx,Xi);
1257                         out += 16;
1258                         in  += 16;
1259                         len -= 16;
1260                 }
1261 #endif
1262                 if (len) {
1263                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1264                         ++ctr;
1265                         if (is_endian.little)
1266 #ifdef BSWAP4
1267                                 ctx->Yi.d[3] = BSWAP4(ctr);
1268 #else
1269                                 PUTU32(ctx->Yi.c+12,ctr);
1270 #endif
1271                         else
1272                                 ctx->Yi.d[3] = ctr;
1273                         while (len--) {
1274                                 u8 c = in[n];
1275                                 ctx->Xi.c[n] ^= c;
1276                                 out[n] = c^ctx->EKi.c[n];
1277                                 ++n;
1278                         }
1279                 }
1280
1281                 ctx->mres = n;
1282                 return 0;
1283         } while(0);
1284 #endif
1285         for (i=0;i<len;++i) {
1286                 u8 c;
1287                 if (n==0) {
1288                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1289                         ++ctr;
1290                         if (is_endian.little)
1291 #ifdef BSWAP4
1292                                 ctx->Yi.d[3] = BSWAP4(ctr);
1293 #else
1294                                 PUTU32(ctx->Yi.c+12,ctr);
1295 #endif
1296                         else
1297                                 ctx->Yi.d[3] = ctr;
1298                 }
1299                 c = in[i];
1300                 out[i] = c^ctx->EKi.c[n];
1301                 ctx->Xi.c[n] ^= c;
1302                 n = (n+1)%16;
1303                 if (n==0)
1304                         GCM_MUL(ctx,Xi);
1305         }
1306
1307         ctx->mres = n;
1308         return 0;
1309 }
1310
1311 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1312                 const unsigned char *in, unsigned char *out,
1313                 size_t len, ctr128_f stream)
1314 {
1315         const union { long one; char little; } is_endian = {1};
1316         unsigned int n, ctr;
1317         size_t i;
1318         u64   mlen = ctx->len.u[1];
1319         void *key  = ctx->key;
1320 #ifdef GCM_FUNCREF_4BIT
1321         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1322 # ifdef GHASH
1323         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1324                                 const u8 *inp,size_t len)       = ctx->ghash;
1325 # endif
1326 #endif
1327
1328         mlen += len;
1329         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1330                 return -1;
1331         ctx->len.u[1] = mlen;
1332
1333         if (ctx->ares) {
1334                 /* First call to encrypt finalizes GHASH(AAD) */
1335                 GCM_MUL(ctx,Xi);
1336                 ctx->ares = 0;
1337         }
1338
1339         if (is_endian.little)
1340 #ifdef BSWAP4
1341                 ctr = BSWAP4(ctx->Yi.d[3]);
1342 #else
1343                 ctr = GETU32(ctx->Yi.c+12);
1344 #endif
1345         else
1346                 ctr = ctx->Yi.d[3];
1347
1348         n = ctx->mres;
1349         if (n) {
1350                 while (n && len) {
1351                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1352                         --len;
1353                         n = (n+1)%16;
1354                 }
1355                 if (n==0) GCM_MUL(ctx,Xi);
1356                 else {
1357                         ctx->mres = n;
1358                         return 0;
1359                 }
1360         }
1361 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1362         while (len>=GHASH_CHUNK) {
1363                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1364                 ctr += GHASH_CHUNK/16;
1365                 if (is_endian.little)
1366 #ifdef BSWAP4
1367                         ctx->Yi.d[3] = BSWAP4(ctr);
1368 #else
1369                         PUTU32(ctx->Yi.c+12,ctr);
1370 #endif
1371                 else
1372                         ctx->Yi.d[3] = ctr;
1373                 GHASH(ctx,out,GHASH_CHUNK);
1374                 out += GHASH_CHUNK;
1375                 in  += GHASH_CHUNK;
1376                 len -= GHASH_CHUNK;
1377         }
1378 #endif
1379         if ((i = (len&(size_t)-16))) {
1380                 size_t j=i/16;
1381
1382                 (*stream)(in,out,j,key,ctx->Yi.c);
1383                 ctr += (unsigned int)j;
1384                 if (is_endian.little)
1385 #ifdef BSWAP4
1386                         ctx->Yi.d[3] = BSWAP4(ctr);
1387 #else
1388                         PUTU32(ctx->Yi.c+12,ctr);
1389 #endif
1390                 else
1391                         ctx->Yi.d[3] = ctr;
1392                 in  += i;
1393                 len -= i;
1394 #if defined(GHASH)
1395                 GHASH(ctx,out,i);
1396                 out += i;
1397 #else
1398                 while (j--) {
1399                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1400                         GCM_MUL(ctx,Xi);
1401                         out += 16;
1402                 }
1403 #endif
1404         }
1405         if (len) {
1406                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1407                 ++ctr;
1408                 if (is_endian.little)
1409 #ifdef BSWAP4
1410                         ctx->Yi.d[3] = BSWAP4(ctr);
1411 #else
1412                         PUTU32(ctx->Yi.c+12,ctr);
1413 #endif
1414                 else
1415                         ctx->Yi.d[3] = ctr;
1416                 while (len--) {
1417                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1418                         ++n;
1419                 }
1420         }
1421
1422         ctx->mres = n;
1423         return 0;
1424 }
1425
1426 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1427                 const unsigned char *in, unsigned char *out,
1428                 size_t len,ctr128_f stream)
1429 {
1430         const union { long one; char little; } is_endian = {1};
1431         unsigned int n, ctr;
1432         size_t i;
1433         u64   mlen = ctx->len.u[1];
1434         void *key  = ctx->key;
1435 #ifdef GCM_FUNCREF_4BIT
1436         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1437 # ifdef GHASH
1438         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1439                                 const u8 *inp,size_t len)       = ctx->ghash;
1440 # endif
1441 #endif
1442
1443         mlen += len;
1444         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1445                 return -1;
1446         ctx->len.u[1] = mlen;
1447
1448         if (ctx->ares) {
1449                 /* First call to decrypt finalizes GHASH(AAD) */
1450                 GCM_MUL(ctx,Xi);
1451                 ctx->ares = 0;
1452         }
1453
1454         if (is_endian.little)
1455 #ifdef BSWAP4
1456                 ctr = BSWAP4(ctx->Yi.d[3]);
1457 #else
1458                 ctr = GETU32(ctx->Yi.c+12);
1459 #endif
1460         else
1461                 ctr = ctx->Yi.d[3];
1462
1463         n = ctx->mres;
1464         if (n) {
1465                 while (n && len) {
1466                         u8 c = *(in++);
1467                         *(out++) = c^ctx->EKi.c[n];
1468                         ctx->Xi.c[n] ^= c;
1469                         --len;
1470                         n = (n+1)%16;
1471                 }
1472                 if (n==0) GCM_MUL (ctx,Xi);
1473                 else {
1474                         ctx->mres = n;
1475                         return 0;
1476                 }
1477         }
1478 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1479         while (len>=GHASH_CHUNK) {
1480                 GHASH(ctx,in,GHASH_CHUNK);
1481                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1482                 ctr += GHASH_CHUNK/16;
1483                 if (is_endian.little)
1484 #ifdef BSWAP4
1485                         ctx->Yi.d[3] = BSWAP4(ctr);
1486 #else
1487                         PUTU32(ctx->Yi.c+12,ctr);
1488 #endif
1489                 else
1490                         ctx->Yi.d[3] = ctr;
1491                 out += GHASH_CHUNK;
1492                 in  += GHASH_CHUNK;
1493                 len -= GHASH_CHUNK;
1494         }
1495 #endif
1496         if ((i = (len&(size_t)-16))) {
1497                 size_t j=i/16;
1498
1499 #if defined(GHASH)
1500                 GHASH(ctx,in,i);
1501 #else
1502                 while (j--) {
1503                         size_t k;
1504                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1505                         GCM_MUL(ctx,Xi);
1506                         in += 16;
1507                 }
1508                 j   = i/16;
1509                 in -= i;
1510 #endif
1511                 (*stream)(in,out,j,key,ctx->Yi.c);
1512                 ctr += (unsigned int)j;
1513                 if (is_endian.little)
1514 #ifdef BSWAP4
1515                         ctx->Yi.d[3] = BSWAP4(ctr);
1516 #else
1517                         PUTU32(ctx->Yi.c+12,ctr);
1518 #endif
1519                 else
1520                         ctx->Yi.d[3] = ctr;
1521                 out += i;
1522                 in  += i;
1523                 len -= i;
1524         }
1525         if (len) {
1526                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1527                 ++ctr;
1528                 if (is_endian.little)
1529 #ifdef BSWAP4
1530                         ctx->Yi.d[3] = BSWAP4(ctr);
1531 #else
1532                         PUTU32(ctx->Yi.c+12,ctr);
1533 #endif
1534                 else
1535                         ctx->Yi.d[3] = ctr;
1536                 while (len--) {
1537                         u8 c = in[n];
1538                         ctx->Xi.c[n] ^= c;
1539                         out[n] = c^ctx->EKi.c[n];
1540                         ++n;
1541                 }
1542         }
1543
1544         ctx->mres = n;
1545         return 0;
1546 }
1547
1548 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1549                         size_t len)
1550 {
1551         const union { long one; char little; } is_endian = {1};
1552         u64 alen = ctx->len.u[0]<<3;
1553         u64 clen = ctx->len.u[1]<<3;
1554 #ifdef GCM_FUNCREF_4BIT
1555         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1556 #endif
1557
1558         if (ctx->mres || ctx->ares)
1559                 GCM_MUL(ctx,Xi);
1560
1561         if (is_endian.little) {
1562 #ifdef BSWAP8
1563                 alen = BSWAP8(alen);
1564                 clen = BSWAP8(clen);
1565 #else
1566                 u8 *p = ctx->len.c;
1567
1568                 ctx->len.u[0] = alen;
1569                 ctx->len.u[1] = clen;
1570
1571                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1572                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1573 #endif
1574         }
1575
1576         ctx->Xi.u[0] ^= alen;
1577         ctx->Xi.u[1] ^= clen;
1578         GCM_MUL(ctx,Xi);
1579
1580         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1581         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1582
1583         if (tag && len<=sizeof(ctx->Xi))
1584                 return memcmp(ctx->Xi.c,tag,len);
1585         else
1586                 return -1;
1587 }
1588
1589 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1590 {
1591         CRYPTO_gcm128_finish(ctx, NULL, 0);
1592         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1593 }
1594
1595 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1596 {
1597         GCM128_CONTEXT *ret;
1598
1599         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1600                 CRYPTO_gcm128_init(ret,key,block);
1601
1602         return ret;
1603 }
1604
1605 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1606 {
1607         if (ctx) {
1608                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1609                 OPENSSL_free(ctx);
1610         }
1611 }
1612
1613 #if defined(SELFTEST)
1614 #include <stdio.h>
1615 #include <openssl/aes.h>
1616
1617 /* Test Case 1 */
1618 static const u8 K1[16],
1619                 *P1=NULL,
1620                 *A1=NULL,
1621                 IV1[12],
1622                 *C1=NULL,
1623                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1624
1625 /* Test Case 2 */
1626 #define K2 K1
1627 #define A2 A1
1628 #define IV2 IV1
1629 static const u8 P2[16],
1630                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1631                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1632
1633 /* Test Case 3 */
1634 #define A3 A2
1635 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1636                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1637                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1638                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1639                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1640                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1641                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1642                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1643                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1644                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1645                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1646
1647 /* Test Case 4 */
1648 #define K4 K3
1649 #define IV4 IV3
1650 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1651                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1652                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1653                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1654                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1655                         0xab,0xad,0xda,0xd2},
1656                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1657                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1658                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1659                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1660                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1661
1662 /* Test Case 5 */
1663 #define K5 K4
1664 #define P5 P4
1665 #define A5 A4
1666 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1667                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1668                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1669                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1670                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1671                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1672
1673 /* Test Case 6 */
1674 #define K6 K5
1675 #define P6 P5
1676 #define A6 A5
1677 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1678                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1679                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1680                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1681                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1682                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1683                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1684                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1685                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1686
1687 /* Test Case 7 */
1688 static const u8 K7[24],
1689                 *P7=NULL,
1690                 *A7=NULL,
1691                 IV7[12],
1692                 *C7=NULL,
1693                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1694
1695 /* Test Case 8 */
1696 #define K8 K7
1697 #define IV8 IV7
1698 #define A8 A7
1699 static const u8 P8[16],
1700                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1701                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1702
1703 /* Test Case 9 */
1704 #define A9 A8
1705 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1706                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1707                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1708                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1709                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1710                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1711                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1712                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1713                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1714                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1715                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1716                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1717
1718 /* Test Case 10 */
1719 #define K10 K9
1720 #define IV10 IV9
1721 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1722                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1723                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1724                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1725                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1726                         0xab,0xad,0xda,0xd2},
1727                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1728                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1729                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1730                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1731                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1732
1733 /* Test Case 11 */
1734 #define K11 K10
1735 #define P11 P10
1736 #define A11 A10
1737 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1738                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1739                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1740                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1741                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1742                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1743
1744 /* Test Case 12 */
1745 #define K12 K11
1746 #define P12 P11
1747 #define A12 A11
1748 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1749                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1750                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1751                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1752                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1753                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1754                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1755                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1756                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1757
1758 /* Test Case 13 */
1759 static const u8 K13[32],
1760                 *P13=NULL,
1761                 *A13=NULL,
1762                 IV13[12],
1763                 *C13=NULL,
1764                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1765
1766 /* Test Case 14 */
1767 #define K14 K13
1768 #define A14 A13
1769 static const u8 P14[16],
1770                 IV14[12],
1771                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1772                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1773
1774 /* Test Case 15 */
1775 #define A15 A14
1776 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1777                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1778                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1779                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1780                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1781                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1782                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1783                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1784                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1785                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1786                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1787                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1788
1789 /* Test Case 16 */
1790 #define K16 K15
1791 #define IV16 IV15
1792 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1793                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1794                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1795                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1796                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1797                         0xab,0xad,0xda,0xd2},
1798                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1799                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1800                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1801                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1802                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1803
1804 /* Test Case 17 */
1805 #define K17 K16
1806 #define P17 P16
1807 #define A17 A16
1808 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1809                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1810                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1811                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1812                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1813                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1814
1815 /* Test Case 18 */
1816 #define K18 K17
1817 #define P18 P17
1818 #define A18 A17
1819 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1820                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1821                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1822                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1823                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1824                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1825                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1826                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1827                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1828
1829 /* Test Case 19 */
1830 #define K19 K1
1831 #define P19 P1
1832 #define IV19 IV1
1833 #define C19 C1
1834 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1835                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1836                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1837                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1838                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1839                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1840                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1841                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1842                 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1843
1844 /* Test Case 20 */
1845 #define K20 K1
1846 #define A20 A1
1847 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1848                 P20[288],
1849                 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1850                         0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1851                         0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1852                         0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1853                         0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1854                         0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1855                         0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1856                         0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1857                         0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1858                         0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1859                         0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1860                         0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1861                         0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1862                         0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1863                         0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1864                         0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1865                         0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1866                         0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1867                 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1868
1869 #define TEST_CASE(n)    do {                                    \
1870         u8 out[sizeof(P##n)];                                   \
1871         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1872         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1873         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1874         memset(out,0,sizeof(out));                              \
1875         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1876         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1877         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1878             (C##n && memcmp(out,C##n,sizeof(out))))             \
1879                 ret++, printf ("encrypt test#%d failed.\n",n);  \
1880         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1881         memset(out,0,sizeof(out));                              \
1882         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1883         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1884         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1885             (P##n && memcmp(out,P##n,sizeof(out))))             \
1886                 ret++, printf ("decrypt test#%d failed.\n",n);  \
1887         } while(0)
1888
1889 int main()
1890 {
1891         GCM128_CONTEXT ctx;
1892         AES_KEY key;
1893         int ret=0;
1894
1895         TEST_CASE(1);
1896         TEST_CASE(2);
1897         TEST_CASE(3);
1898         TEST_CASE(4);
1899         TEST_CASE(5);
1900         TEST_CASE(6);
1901         TEST_CASE(7);
1902         TEST_CASE(8);
1903         TEST_CASE(9);
1904         TEST_CASE(10);
1905         TEST_CASE(11);
1906         TEST_CASE(12);
1907         TEST_CASE(13);
1908         TEST_CASE(14);
1909         TEST_CASE(15);
1910         TEST_CASE(16);
1911         TEST_CASE(17);
1912         TEST_CASE(18);
1913         TEST_CASE(19);
1914         TEST_CASE(20);
1915
1916 #ifdef OPENSSL_CPUID_OBJ
1917         {
1918         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1919         union { u64 u; u8 c[1024]; } buf;
1920         int i;
1921
1922         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1923         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1924         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1925
1926         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1927         start = OPENSSL_rdtsc();
1928         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1929         gcm_t = OPENSSL_rdtsc() - start;
1930
1931         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1932                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1933                         (block128_f)AES_encrypt);
1934         start = OPENSSL_rdtsc();
1935         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1936                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1937                         (block128_f)AES_encrypt);
1938         ctr_t = OPENSSL_rdtsc() - start;
1939
1940         printf("%.2f-%.2f=%.2f\n",
1941                         gcm_t/(double)sizeof(buf),
1942                         ctr_t/(double)sizeof(buf),
1943                         (gcm_t-ctr_t)/(double)sizeof(buf));
1944 #ifdef GHASH
1945         {
1946         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1947                                 const u8 *inp,size_t len)       = ctx.ghash;
1948
1949         GHASH((&ctx),buf.c,sizeof(buf));
1950         start = OPENSSL_rdtsc();
1951         for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1952         gcm_t = OPENSSL_rdtsc() - start;
1953         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1954         }
1955 #endif
1956         }
1957 #endif
1958
1959         return ret;
1960 }
1961 #endif