253746c5697ad039a721e465d216dbc521037be2
[openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if     TABLE_BITS==8
120
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123         int  i, j;
124         u128 V;
125
126         Htable[0].hi = 0;
127         Htable[0].lo = 0;
128         V.hi = H[0];
129         V.lo = H[1];
130
131         for (Htable[128]=V, i=64; i>0; i>>=1) {
132                 REDUCE1BIT(V);
133                 Htable[i] = V;
134         }
135
136         for (i=2; i<256; i<<=1) {
137                 u128 *Hi = Htable+i, H0 = *Hi;
138                 for (j=1; j<i; ++j) {
139                         Hi[j].hi = H0.hi^Htable[j].hi;
140                         Hi[j].lo = H0.lo^Htable[j].lo;
141                 }
142         }
143 }
144
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147         u128 Z = { 0, 0};
148         const u8 *xi = (const u8 *)Xi+15;
149         size_t rem, n = *xi;
150         const union { long one; char little; } is_endian = {1};
151         __fips_constseg
152         static const size_t rem_8bit[256] = {
153                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
217
218         while (1) {
219                 Z.hi ^= Htable[n].hi;
220                 Z.lo ^= Htable[n].lo;
221
222                 if ((u8 *)Xi==xi)       break;
223
224                 n = *(--xi);
225
226                 rem  = (size_t)Z.lo&0xff;
227                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228                 Z.hi = (Z.hi>>8);
229                 if (sizeof(size_t)==8)
230                         Z.hi ^= rem_8bit[rem];
231                 else
232                         Z.hi ^= (u64)rem_8bit[rem]<<32;
233         }
234
235         if (is_endian.little) {
236 #ifdef BSWAP8
237                 Xi[0] = BSWAP8(Z.hi);
238                 Xi[1] = BSWAP8(Z.lo);
239 #else
240                 u8 *p = (u8 *)Xi;
241                 u32 v;
242                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
243                 v = (u32)(Z.hi);        PUTU32(p+4,v);
244                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
245                 v = (u32)(Z.lo);        PUTU32(p+12,v);
246 #endif
247         }
248         else {
249                 Xi[0] = Z.hi;
250                 Xi[1] = Z.lo;
251         }
252 }
253 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
254
255 #elif   TABLE_BITS==4
256
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
258 {
259         u128 V;
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
261         int  i;
262 #endif
263
264         Htable[0].hi = 0;
265         Htable[0].lo = 0;
266         V.hi = H[0];
267         V.lo = H[1];
268
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270         for (Htable[8]=V, i=4; i>0; i>>=1) {
271                 REDUCE1BIT(V);
272                 Htable[i] = V;
273         }
274
275         for (i=2; i<16; i<<=1) {
276                 u128 *Hi = Htable+i;
277                 int   j;
278                 for (V=*Hi, j=1; j<i; ++j) {
279                         Hi[j].hi = V.hi^Htable[j].hi;
280                         Hi[j].lo = V.lo^Htable[j].lo;
281                 }
282         }
283 #else
284         Htable[8] = V;
285         REDUCE1BIT(V);
286         Htable[4] = V;
287         REDUCE1BIT(V);
288         Htable[2] = V;
289         REDUCE1BIT(V);
290         Htable[1] = V;
291         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
292         V=Htable[4];
293         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
294         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
295         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
296         V=Htable[8];
297         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
298         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304 #endif
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306         /*
307          * ARM assembler expects specific dword order in Htable.
308          */
309         {
310         int j;
311         const union { long one; char little; } is_endian = {1};
312
313         if (is_endian.little)
314                 for (j=0;j<16;++j) {
315                         V = Htable[j];
316                         Htable[j].hi = V.lo;
317                         Htable[j].lo = V.hi;
318                 }
319         else
320                 for (j=0;j<16;++j) {
321                         V = Htable[j];
322                         Htable[j].hi = V.lo<<32|V.lo>>32;
323                         Htable[j].lo = V.hi<<32|V.hi>>32;
324                 }
325         }
326 #endif
327 }
328
329 #ifndef GHASH_ASM
330 __fips_constseg
331 static const size_t rem_4bit[16] = {
332         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
338 {
339         u128 Z;
340         int cnt = 15;
341         size_t rem, nlo, nhi;
342         const union { long one; char little; } is_endian = {1};
343
344         nlo  = ((const u8 *)Xi)[15];
345         nhi  = nlo>>4;
346         nlo &= 0xf;
347
348         Z.hi = Htable[nlo].hi;
349         Z.lo = Htable[nlo].lo;
350
351         while (1) {
352                 rem  = (size_t)Z.lo&0xf;
353                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354                 Z.hi = (Z.hi>>4);
355                 if (sizeof(size_t)==8)
356                         Z.hi ^= rem_4bit[rem];
357                 else
358                         Z.hi ^= (u64)rem_4bit[rem]<<32;
359
360                 Z.hi ^= Htable[nhi].hi;
361                 Z.lo ^= Htable[nhi].lo;
362
363                 if (--cnt<0)            break;
364
365                 nlo  = ((const u8 *)Xi)[cnt];
366                 nhi  = nlo>>4;
367                 nlo &= 0xf;
368
369                 rem  = (size_t)Z.lo&0xf;
370                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371                 Z.hi = (Z.hi>>4);
372                 if (sizeof(size_t)==8)
373                         Z.hi ^= rem_4bit[rem];
374                 else
375                         Z.hi ^= (u64)rem_4bit[rem]<<32;
376
377                 Z.hi ^= Htable[nlo].hi;
378                 Z.lo ^= Htable[nlo].lo;
379         }
380
381         if (is_endian.little) {
382 #ifdef BSWAP8
383                 Xi[0] = BSWAP8(Z.hi);
384                 Xi[1] = BSWAP8(Z.lo);
385 #else
386                 u8 *p = (u8 *)Xi;
387                 u32 v;
388                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
389                 v = (u32)(Z.hi);        PUTU32(p+4,v);
390                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
391                 v = (u32)(Z.lo);        PUTU32(p+12,v);
392 #endif
393         }
394         else {
395                 Xi[0] = Z.hi;
396                 Xi[1] = Z.lo;
397         }
398 }
399
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
401 /*
402  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403  * details... Compiler-generated code doesn't seem to give any
404  * performance improvement, at least not on x86[_64]. It's here
405  * mostly as reference and a placeholder for possible future
406  * non-trivial optimization[s]...
407  */
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409                                 const u8 *inp,size_t len)
410 {
411     u128 Z;
412     int cnt;
413     size_t rem, nlo, nhi;
414     const union { long one; char little; } is_endian = {1};
415
416 #if 1
417     do {
418         cnt  = 15;
419         nlo  = ((const u8 *)Xi)[15];
420         nlo ^= inp[15];
421         nhi  = nlo>>4;
422         nlo &= 0xf;
423
424         Z.hi = Htable[nlo].hi;
425         Z.lo = Htable[nlo].lo;
426
427         while (1) {
428                 rem  = (size_t)Z.lo&0xf;
429                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
430                 Z.hi = (Z.hi>>4);
431                 if (sizeof(size_t)==8)
432                         Z.hi ^= rem_4bit[rem];
433                 else
434                         Z.hi ^= (u64)rem_4bit[rem]<<32;
435
436                 Z.hi ^= Htable[nhi].hi;
437                 Z.lo ^= Htable[nhi].lo;
438
439                 if (--cnt<0)            break;
440
441                 nlo  = ((const u8 *)Xi)[cnt];
442                 nlo ^= inp[cnt];
443                 nhi  = nlo>>4;
444                 nlo &= 0xf;
445
446                 rem  = (size_t)Z.lo&0xf;
447                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
448                 Z.hi = (Z.hi>>4);
449                 if (sizeof(size_t)==8)
450                         Z.hi ^= rem_4bit[rem];
451                 else
452                         Z.hi ^= (u64)rem_4bit[rem]<<32;
453
454                 Z.hi ^= Htable[nlo].hi;
455                 Z.lo ^= Htable[nlo].lo;
456         }
457 #else
458     /*
459      * Extra 256+16 bytes per-key plus 512 bytes shared tables
460      * [should] give ~50% improvement... One could have PACK()-ed
461      * the rem_8bit even here, but the priority is to minimize
462      * cache footprint...
463      */ 
464     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
465     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
466     __fips_constseg
467     static const unsigned short rem_8bit[256] = {
468         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
500     /*
501      * This pre-processing phase slows down procedure by approximately
502      * same time as it makes each loop spin faster. In other words
503      * single block performance is approximately same as straightforward
504      * "4-bit" implementation, and then it goes only faster...
505      */
506     for (cnt=0; cnt<16; ++cnt) {
507         Z.hi = Htable[cnt].hi;
508         Z.lo = Htable[cnt].lo;
509         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510         Hshr4[cnt].hi = (Z.hi>>4);
511         Hshl4[cnt]    = (u8)(Z.lo<<4);
512     }
513
514     do {
515         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516                 nlo  = ((const u8 *)Xi)[cnt];
517                 nlo ^= inp[cnt];
518                 nhi  = nlo>>4;
519                 nlo &= 0xf;
520
521                 Z.hi ^= Htable[nlo].hi;
522                 Z.lo ^= Htable[nlo].lo;
523
524                 rem = (size_t)Z.lo&0xff;
525
526                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527                 Z.hi = (Z.hi>>8);
528
529                 Z.hi ^= Hshr4[nhi].hi;
530                 Z.lo ^= Hshr4[nhi].lo;
531                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532         }
533
534         nlo  = ((const u8 *)Xi)[0];
535         nlo ^= inp[0];
536         nhi  = nlo>>4;
537         nlo &= 0xf;
538
539         Z.hi ^= Htable[nlo].hi;
540         Z.lo ^= Htable[nlo].lo;
541
542         rem = (size_t)Z.lo&0xf;
543
544         Z.lo = (Z.hi<<60)|(Z.lo>>4);
545         Z.hi = (Z.hi>>4);
546
547         Z.hi ^= Htable[nhi].hi;
548         Z.lo ^= Htable[nhi].lo;
549         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550 #endif
551
552         if (is_endian.little) {
553 #ifdef BSWAP8
554                 Xi[0] = BSWAP8(Z.hi);
555                 Xi[1] = BSWAP8(Z.lo);
556 #else
557                 u8 *p = (u8 *)Xi;
558                 u32 v;
559                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
560                 v = (u32)(Z.hi);        PUTU32(p+4,v);
561                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
562                 v = (u32)(Z.lo);        PUTU32(p+12,v);
563 #endif
564         }
565         else {
566                 Xi[0] = Z.hi;
567                 Xi[1] = Z.lo;
568         }
569     } while (inp+=16, len-=16);
570 }
571 #endif
572 #else
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
575 #endif
576
577 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581  * trashing effect. In other words idea is to hash data while it's
582  * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK       (3*1024)
584 #endif
585
586 #else   /* TABLE_BITS */
587
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
589 {
590         u128 V,Z = { 0,0 };
591         long X;
592         int  i,j;
593         const long *xi = (const long *)Xi;
594         const union { long one; char little; } is_endian = {1};
595
596         V.hi = H[0];    /* H is in host byte order, no byte swapping */
597         V.lo = H[1];
598
599         for (j=0; j<16/sizeof(long); ++j) {
600                 if (is_endian.little) {
601                         if (sizeof(long)==8) {
602 #ifdef BSWAP8
603                                 X = (long)(BSWAP8(xi[j]));
604 #else
605                                 const u8 *p = (const u8 *)(xi+j);
606                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607 #endif
608                         }
609                         else {
610                                 const u8 *p = (const u8 *)(xi+j);
611                                 X = (long)GETU32(p);
612                         }
613                 }
614                 else
615                         X = xi[j];
616
617                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618                         u64 M = (u64)(X>>(8*sizeof(long)-1));
619                         Z.hi ^= V.hi&M;
620                         Z.lo ^= V.lo&M;
621
622                         REDUCE1BIT(V);
623                 }
624         }
625
626         if (is_endian.little) {
627 #ifdef BSWAP8
628                 Xi[0] = BSWAP8(Z.hi);
629                 Xi[1] = BSWAP8(Z.lo);
630 #else
631                 u8 *p = (u8 *)Xi;
632                 u32 v;
633                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
634                 v = (u32)(Z.hi);        PUTU32(p+4,v);
635                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
636                 v = (u32)(Z.lo);        PUTU32(p+12,v);
637 #endif
638         }
639         else {
640                 Xi[0] = Z.hi;
641                 Xi[1] = Z.lo;
642         }
643 }
644 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645
646 #endif
647
648 #if     TABLE_BITS==4 && defined(GHASH_ASM)
649 # if    !defined(I386_ONLY) && \
650         (defined(__i386)        || defined(__i386__)    || \
651          defined(__x86_64)      || defined(__x86_64__)  || \
652          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
653 #  define GHASH_ASM_X86_OR_64
654 #  define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
656
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660
661 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define gcm_init_avx   gcm_init_clmul
663 # define gcm_gmult_avx  gcm_gmult_clmul
664 # define gcm_ghash_avx  gcm_ghash_clmul
665 #else
666 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
667 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
668 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669 #endif
670
671 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
672 #   define GHASH_ASM_X86
673 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675
676 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
678 #  endif
679 # elif defined(__arm__) || defined(__arm)
680 #  include "arm_arch.h"
681 #  if __ARM_ARCH__>=7
682 #   define GHASH_ASM_ARM
683 #   define GCM_FUNCREF_4BIT
684 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
685 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
686 #  endif
687 # elif defined(__sparc__) || defined(__sparc)
688 #  include "sparc_arch.h"
689 #  define GHASH_ASM_SPARC
690 #  define GCM_FUNCREF_4BIT
691 extern unsigned int OPENSSL_sparcv9cap_P[];
692 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
693 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
694 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
695 # endif
696 #endif
697
698 #ifdef GCM_FUNCREF_4BIT
699 # undef  GCM_MUL
700 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
701 # ifdef GHASH
702 #  undef  GHASH
703 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
704 # endif
705 #endif
706
707 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
708 {
709         const union { long one; char little; } is_endian = {1};
710
711         memset(ctx,0,sizeof(*ctx));
712         ctx->block = block;
713         ctx->key   = key;
714
715         (*block)(ctx->H.c,ctx->H.c,key);
716
717         if (is_endian.little) {
718                 /* H is stored in host byte order */
719 #ifdef BSWAP8
720                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
721                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
722 #else
723                 u8 *p = ctx->H.c;
724                 u64 hi,lo;
725                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
726                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
727                 ctx->H.u[0] = hi;
728                 ctx->H.u[1] = lo;
729 #endif
730         }
731
732 #if     TABLE_BITS==8
733         gcm_init_8bit(ctx->Htable,ctx->H.u);
734 #elif   TABLE_BITS==4
735 # if    defined(GHASH_ASM_X86_OR_64)
736 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
737         if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
738             OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
739                 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) {  /* AVX+MOVBE */
740                         gcm_init_avx(ctx->Htable,ctx->H.u);
741                         ctx->gmult = gcm_gmult_avx;
742                         ctx->ghash = gcm_ghash_avx;
743                 } else {
744                         gcm_init_clmul(ctx->Htable,ctx->H.u);
745                         ctx->gmult = gcm_gmult_clmul;
746                         ctx->ghash = gcm_ghash_clmul;
747                 }
748                 return;
749         }
750 #  endif
751         gcm_init_4bit(ctx->Htable,ctx->H.u);
752 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
753 #   if  defined(OPENSSL_IA32_SSE2)
754         if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
755 #   else
756         if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
757 #   endif
758                 ctx->gmult = gcm_gmult_4bit_mmx;
759                 ctx->ghash = gcm_ghash_4bit_mmx;
760         } else {
761                 ctx->gmult = gcm_gmult_4bit_x86;
762                 ctx->ghash = gcm_ghash_4bit_x86;
763         }
764 #  else
765         ctx->gmult = gcm_gmult_4bit;
766         ctx->ghash = gcm_ghash_4bit;
767 #  endif
768 # elif  defined(GHASH_ASM_ARM)
769         if (OPENSSL_armcap_P & ARMV7_NEON) {
770                 ctx->gmult = gcm_gmult_neon;
771                 ctx->ghash = gcm_ghash_neon;
772         } else {
773                 gcm_init_4bit(ctx->Htable,ctx->H.u);
774                 ctx->gmult = gcm_gmult_4bit;
775                 ctx->ghash = gcm_ghash_4bit;
776         }
777 # elif  defined(GHASH_ASM_SPARC)
778         if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
779                 gcm_init_vis3(ctx->Htable,ctx->H.u);
780                 ctx->gmult = gcm_gmult_vis3;
781                 ctx->ghash = gcm_ghash_vis3;
782         } else {
783                 gcm_init_4bit(ctx->Htable,ctx->H.u);
784                 ctx->gmult = gcm_gmult_4bit;
785                 ctx->ghash = gcm_ghash_4bit;
786         }
787 # else
788         gcm_init_4bit(ctx->Htable,ctx->H.u);
789 # endif
790 #endif
791 }
792
793 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
794 {
795         const union { long one; char little; } is_endian = {1};
796         unsigned int ctr;
797 #ifdef GCM_FUNCREF_4BIT
798         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
799 #endif
800
801         ctx->Yi.u[0]  = 0;
802         ctx->Yi.u[1]  = 0;
803         ctx->Xi.u[0]  = 0;
804         ctx->Xi.u[1]  = 0;
805         ctx->len.u[0] = 0;      /* AAD length */
806         ctx->len.u[1] = 0;      /* message length */
807         ctx->ares = 0;
808         ctx->mres = 0;
809
810         if (len==12) {
811                 memcpy(ctx->Yi.c,iv,12);
812                 ctx->Yi.c[15]=1;
813                 ctr=1;
814         }
815         else {
816                 size_t i;
817                 u64 len0 = len;
818
819                 while (len>=16) {
820                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
821                         GCM_MUL(ctx,Yi);
822                         iv += 16;
823                         len -= 16;
824                 }
825                 if (len) {
826                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
827                         GCM_MUL(ctx,Yi);
828                 }
829                 len0 <<= 3;
830                 if (is_endian.little) {
831 #ifdef BSWAP8
832                         ctx->Yi.u[1]  ^= BSWAP8(len0);
833 #else
834                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
835                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
836                         ctx->Yi.c[10] ^= (u8)(len0>>40);
837                         ctx->Yi.c[11] ^= (u8)(len0>>32);
838                         ctx->Yi.c[12] ^= (u8)(len0>>24);
839                         ctx->Yi.c[13] ^= (u8)(len0>>16);
840                         ctx->Yi.c[14] ^= (u8)(len0>>8);
841                         ctx->Yi.c[15] ^= (u8)(len0);
842 #endif
843                 }
844                 else
845                         ctx->Yi.u[1]  ^= len0;
846
847                 GCM_MUL(ctx,Yi);
848
849                 if (is_endian.little)
850 #ifdef BSWAP4
851                         ctr = BSWAP4(ctx->Yi.d[3]);
852 #else
853                         ctr = GETU32(ctx->Yi.c+12);
854 #endif
855                 else
856                         ctr = ctx->Yi.d[3];
857         }
858
859         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
860         ++ctr;
861         if (is_endian.little)
862 #ifdef BSWAP4
863                 ctx->Yi.d[3] = BSWAP4(ctr);
864 #else
865                 PUTU32(ctx->Yi.c+12,ctr);
866 #endif
867         else
868                 ctx->Yi.d[3] = ctr;
869 }
870
871 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
872 {
873         size_t i;
874         unsigned int n;
875         u64 alen = ctx->len.u[0];
876 #ifdef GCM_FUNCREF_4BIT
877         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
878 # ifdef GHASH
879         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
880                                 const u8 *inp,size_t len)       = ctx->ghash;
881 # endif
882 #endif
883
884         if (ctx->len.u[1]) return -2;
885
886         alen += len;
887         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
888                 return -1;
889         ctx->len.u[0] = alen;
890
891         n = ctx->ares;
892         if (n) {
893                 while (n && len) {
894                         ctx->Xi.c[n] ^= *(aad++);
895                         --len;
896                         n = (n+1)%16;
897                 }
898                 if (n==0) GCM_MUL(ctx,Xi);
899                 else {
900                         ctx->ares = n;
901                         return 0;
902                 }
903         }
904
905 #ifdef GHASH
906         if ((i = (len&(size_t)-16))) {
907                 GHASH(ctx,aad,i);
908                 aad += i;
909                 len -= i;
910         }
911 #else
912         while (len>=16) {
913                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
914                 GCM_MUL(ctx,Xi);
915                 aad += 16;
916                 len -= 16;
917         }
918 #endif
919         if (len) {
920                 n = (unsigned int)len;
921                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
922         }
923
924         ctx->ares = n;
925         return 0;
926 }
927
928 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
929                 const unsigned char *in, unsigned char *out,
930                 size_t len)
931 {
932         const union { long one; char little; } is_endian = {1};
933         unsigned int n, ctr;
934         size_t i;
935         u64        mlen  = ctx->len.u[1];
936         block128_f block = ctx->block;
937         void      *key   = ctx->key;
938 #ifdef GCM_FUNCREF_4BIT
939         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
940 # ifdef GHASH
941         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
942                                 const u8 *inp,size_t len)       = ctx->ghash;
943 # endif
944 #endif
945
946 #if 0
947         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
948 #endif
949         mlen += len;
950         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
951                 return -1;
952         ctx->len.u[1] = mlen;
953
954         if (ctx->ares) {
955                 /* First call to encrypt finalizes GHASH(AAD) */
956                 GCM_MUL(ctx,Xi);
957                 ctx->ares = 0;
958         }
959
960         if (is_endian.little)
961 #ifdef BSWAP4
962                 ctr = BSWAP4(ctx->Yi.d[3]);
963 #else
964                 ctr = GETU32(ctx->Yi.c+12);
965 #endif
966         else
967                 ctr = ctx->Yi.d[3];
968
969         n = ctx->mres;
970 #if !defined(OPENSSL_SMALL_FOOTPRINT)
971         if (16%sizeof(size_t) == 0) do {        /* always true actually */
972                 if (n) {
973                         while (n && len) {
974                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
975                                 --len;
976                                 n = (n+1)%16;
977                         }
978                         if (n==0) GCM_MUL(ctx,Xi);
979                         else {
980                                 ctx->mres = n;
981                                 return 0;
982                         }
983                 }
984 #if defined(STRICT_ALIGNMENT)
985                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
986                         break;
987 #endif
988 #if defined(GHASH) && defined(GHASH_CHUNK)
989                 while (len>=GHASH_CHUNK) {
990                     size_t j=GHASH_CHUNK;
991
992                     while (j) {
993                         size_t *out_t=(size_t *)out;
994                         const size_t *in_t=(const size_t *)in;
995
996                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
997                         ++ctr;
998                         if (is_endian.little)
999 #ifdef BSWAP4
1000                                 ctx->Yi.d[3] = BSWAP4(ctr);
1001 #else
1002                                 PUTU32(ctx->Yi.c+12,ctr);
1003 #endif
1004                         else
1005                                 ctx->Yi.d[3] = ctr;
1006                         for (i=0; i<16/sizeof(size_t); ++i)
1007                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1008                         out += 16;
1009                         in  += 16;
1010                         j   -= 16;
1011                     }
1012                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1013                     len -= GHASH_CHUNK;
1014                 }
1015                 if ((i = (len&(size_t)-16))) {
1016                     size_t j=i;
1017
1018                     while (len>=16) {
1019                         size_t *out_t=(size_t *)out;
1020                         const size_t *in_t=(const size_t *)in;
1021
1022                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1023                         ++ctr;
1024                         if (is_endian.little)
1025 #ifdef BSWAP4
1026                                 ctx->Yi.d[3] = BSWAP4(ctr);
1027 #else
1028                                 PUTU32(ctx->Yi.c+12,ctr);
1029 #endif
1030                         else
1031                                 ctx->Yi.d[3] = ctr;
1032                         for (i=0; i<16/sizeof(size_t); ++i)
1033                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1034                         out += 16;
1035                         in  += 16;
1036                         len -= 16;
1037                     }
1038                     GHASH(ctx,out-j,j);
1039                 }
1040 #else
1041                 while (len>=16) {
1042                         size_t *out_t=(size_t *)out;
1043                         const size_t *in_t=(const size_t *)in;
1044
1045                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1046                         ++ctr;
1047                         if (is_endian.little)
1048 #ifdef BSWAP4
1049                                 ctx->Yi.d[3] = BSWAP4(ctr);
1050 #else
1051                                 PUTU32(ctx->Yi.c+12,ctr);
1052 #endif
1053                         else
1054                                 ctx->Yi.d[3] = ctr;
1055                         for (i=0; i<16/sizeof(size_t); ++i)
1056                                 ctx->Xi.t[i] ^=
1057                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1058                         GCM_MUL(ctx,Xi);
1059                         out += 16;
1060                         in  += 16;
1061                         len -= 16;
1062                 }
1063 #endif
1064                 if (len) {
1065                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1066                         ++ctr;
1067                         if (is_endian.little)
1068 #ifdef BSWAP4
1069                                 ctx->Yi.d[3] = BSWAP4(ctr);
1070 #else
1071                                 PUTU32(ctx->Yi.c+12,ctr);
1072 #endif
1073                         else
1074                                 ctx->Yi.d[3] = ctr;
1075                         while (len--) {
1076                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1077                                 ++n;
1078                         }
1079                 }
1080
1081                 ctx->mres = n;
1082                 return 0;
1083         } while(0);
1084 #endif
1085         for (i=0;i<len;++i) {
1086                 if (n==0) {
1087                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1088                         ++ctr;
1089                         if (is_endian.little)
1090 #ifdef BSWAP4
1091                                 ctx->Yi.d[3] = BSWAP4(ctr);
1092 #else
1093                                 PUTU32(ctx->Yi.c+12,ctr);
1094 #endif
1095                         else
1096                                 ctx->Yi.d[3] = ctr;
1097                 }
1098                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1099                 n = (n+1)%16;
1100                 if (n==0)
1101                         GCM_MUL(ctx,Xi);
1102         }
1103
1104         ctx->mres = n;
1105         return 0;
1106 }
1107
1108 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1109                 const unsigned char *in, unsigned char *out,
1110                 size_t len)
1111 {
1112         const union { long one; char little; } is_endian = {1};
1113         unsigned int n, ctr;
1114         size_t i;
1115         u64        mlen  = ctx->len.u[1];
1116         block128_f block = ctx->block;
1117         void      *key   = ctx->key;
1118 #ifdef GCM_FUNCREF_4BIT
1119         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1120 # ifdef GHASH
1121         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1122                                 const u8 *inp,size_t len)       = ctx->ghash;
1123 # endif
1124 #endif
1125
1126         mlen += len;
1127         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1128                 return -1;
1129         ctx->len.u[1] = mlen;
1130
1131         if (ctx->ares) {
1132                 /* First call to decrypt finalizes GHASH(AAD) */
1133                 GCM_MUL(ctx,Xi);
1134                 ctx->ares = 0;
1135         }
1136
1137         if (is_endian.little)
1138 #ifdef BSWAP4
1139                 ctr = BSWAP4(ctx->Yi.d[3]);
1140 #else
1141                 ctr = GETU32(ctx->Yi.c+12);
1142 #endif
1143         else
1144                 ctr = ctx->Yi.d[3];
1145
1146         n = ctx->mres;
1147 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1148         if (16%sizeof(size_t) == 0) do {        /* always true actually */
1149                 if (n) {
1150                         while (n && len) {
1151                                 u8 c = *(in++);
1152                                 *(out++) = c^ctx->EKi.c[n];
1153                                 ctx->Xi.c[n] ^= c;
1154                                 --len;
1155                                 n = (n+1)%16;
1156                         }
1157                         if (n==0) GCM_MUL (ctx,Xi);
1158                         else {
1159                                 ctx->mres = n;
1160                                 return 0;
1161                         }
1162                 }
1163 #if defined(STRICT_ALIGNMENT)
1164                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1165                         break;
1166 #endif
1167 #if defined(GHASH) && defined(GHASH_CHUNK)
1168                 while (len>=GHASH_CHUNK) {
1169                     size_t j=GHASH_CHUNK;
1170
1171                     GHASH(ctx,in,GHASH_CHUNK);
1172                     while (j) {
1173                         size_t *out_t=(size_t *)out;
1174                         const size_t *in_t=(const size_t *)in;
1175
1176                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1177                         ++ctr;
1178                         if (is_endian.little)
1179 #ifdef BSWAP4
1180                                 ctx->Yi.d[3] = BSWAP4(ctr);
1181 #else
1182                                 PUTU32(ctx->Yi.c+12,ctr);
1183 #endif
1184                         else
1185                                 ctx->Yi.d[3] = ctr;
1186                         for (i=0; i<16/sizeof(size_t); ++i)
1187                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1188                         out += 16;
1189                         in  += 16;
1190                         j   -= 16;
1191                     }
1192                     len -= GHASH_CHUNK;
1193                 }
1194                 if ((i = (len&(size_t)-16))) {
1195                     GHASH(ctx,in,i);
1196                     while (len>=16) {
1197                         size_t *out_t=(size_t *)out;
1198                         const size_t *in_t=(const size_t *)in;
1199
1200                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1201                         ++ctr;
1202                         if (is_endian.little)
1203 #ifdef BSWAP4
1204                                 ctx->Yi.d[3] = BSWAP4(ctr);
1205 #else
1206                                 PUTU32(ctx->Yi.c+12,ctr);
1207 #endif
1208                         else
1209                                 ctx->Yi.d[3] = ctr;
1210                         for (i=0; i<16/sizeof(size_t); ++i)
1211                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1212                         out += 16;
1213                         in  += 16;
1214                         len -= 16;
1215                     }
1216                 }
1217 #else
1218                 while (len>=16) {
1219                         size_t *out_t=(size_t *)out;
1220                         const size_t *in_t=(const size_t *)in;
1221
1222                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1223                         ++ctr;
1224                         if (is_endian.little)
1225 #ifdef BSWAP4
1226                                 ctx->Yi.d[3] = BSWAP4(ctr);
1227 #else
1228                                 PUTU32(ctx->Yi.c+12,ctr);
1229 #endif
1230                         else
1231                                 ctx->Yi.d[3] = ctr;
1232                         for (i=0; i<16/sizeof(size_t); ++i) {
1233                                 size_t c = in[i];
1234                                 out[i] = c^ctx->EKi.t[i];
1235                                 ctx->Xi.t[i] ^= c;
1236                         }
1237                         GCM_MUL(ctx,Xi);
1238                         out += 16;
1239                         in  += 16;
1240                         len -= 16;
1241                 }
1242 #endif
1243                 if (len) {
1244                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1245                         ++ctr;
1246                         if (is_endian.little)
1247 #ifdef BSWAP4
1248                                 ctx->Yi.d[3] = BSWAP4(ctr);
1249 #else
1250                                 PUTU32(ctx->Yi.c+12,ctr);
1251 #endif
1252                         else
1253                                 ctx->Yi.d[3] = ctr;
1254                         while (len--) {
1255                                 u8 c = in[n];
1256                                 ctx->Xi.c[n] ^= c;
1257                                 out[n] = c^ctx->EKi.c[n];
1258                                 ++n;
1259                         }
1260                 }
1261
1262                 ctx->mres = n;
1263                 return 0;
1264         } while(0);
1265 #endif
1266         for (i=0;i<len;++i) {
1267                 u8 c;
1268                 if (n==0) {
1269                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1270                         ++ctr;
1271                         if (is_endian.little)
1272 #ifdef BSWAP4
1273                                 ctx->Yi.d[3] = BSWAP4(ctr);
1274 #else
1275                                 PUTU32(ctx->Yi.c+12,ctr);
1276 #endif
1277                         else
1278                                 ctx->Yi.d[3] = ctr;
1279                 }
1280                 c = in[i];
1281                 out[i] = c^ctx->EKi.c[n];
1282                 ctx->Xi.c[n] ^= c;
1283                 n = (n+1)%16;
1284                 if (n==0)
1285                         GCM_MUL(ctx,Xi);
1286         }
1287
1288         ctx->mres = n;
1289         return 0;
1290 }
1291
1292 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1293                 const unsigned char *in, unsigned char *out,
1294                 size_t len, ctr128_f stream)
1295 {
1296         const union { long one; char little; } is_endian = {1};
1297         unsigned int n, ctr;
1298         size_t i;
1299         u64   mlen = ctx->len.u[1];
1300         void *key  = ctx->key;
1301 #ifdef GCM_FUNCREF_4BIT
1302         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1303 # ifdef GHASH
1304         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1305                                 const u8 *inp,size_t len)       = ctx->ghash;
1306 # endif
1307 #endif
1308
1309         mlen += len;
1310         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1311                 return -1;
1312         ctx->len.u[1] = mlen;
1313
1314         if (ctx->ares) {
1315                 /* First call to encrypt finalizes GHASH(AAD) */
1316                 GCM_MUL(ctx,Xi);
1317                 ctx->ares = 0;
1318         }
1319
1320         if (is_endian.little)
1321 #ifdef BSWAP4
1322                 ctr = BSWAP4(ctx->Yi.d[3]);
1323 #else
1324                 ctr = GETU32(ctx->Yi.c+12);
1325 #endif
1326         else
1327                 ctr = ctx->Yi.d[3];
1328
1329         n = ctx->mres;
1330         if (n) {
1331                 while (n && len) {
1332                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1333                         --len;
1334                         n = (n+1)%16;
1335                 }
1336                 if (n==0) GCM_MUL(ctx,Xi);
1337                 else {
1338                         ctx->mres = n;
1339                         return 0;
1340                 }
1341         }
1342 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1343         while (len>=GHASH_CHUNK) {
1344                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1345                 ctr += GHASH_CHUNK/16;
1346                 if (is_endian.little)
1347 #ifdef BSWAP4
1348                         ctx->Yi.d[3] = BSWAP4(ctr);
1349 #else
1350                         PUTU32(ctx->Yi.c+12,ctr);
1351 #endif
1352                 else
1353                         ctx->Yi.d[3] = ctr;
1354                 GHASH(ctx,out,GHASH_CHUNK);
1355                 out += GHASH_CHUNK;
1356                 in  += GHASH_CHUNK;
1357                 len -= GHASH_CHUNK;
1358         }
1359 #endif
1360         if ((i = (len&(size_t)-16))) {
1361                 size_t j=i/16;
1362
1363                 (*stream)(in,out,j,key,ctx->Yi.c);
1364                 ctr += (unsigned int)j;
1365                 if (is_endian.little)
1366 #ifdef BSWAP4
1367                         ctx->Yi.d[3] = BSWAP4(ctr);
1368 #else
1369                         PUTU32(ctx->Yi.c+12,ctr);
1370 #endif
1371                 else
1372                         ctx->Yi.d[3] = ctr;
1373                 in  += i;
1374                 len -= i;
1375 #if defined(GHASH)
1376                 GHASH(ctx,out,i);
1377                 out += i;
1378 #else
1379                 while (j--) {
1380                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1381                         GCM_MUL(ctx,Xi);
1382                         out += 16;
1383                 }
1384 #endif
1385         }
1386         if (len) {
1387                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1388                 ++ctr;
1389                 if (is_endian.little)
1390 #ifdef BSWAP4
1391                         ctx->Yi.d[3] = BSWAP4(ctr);
1392 #else
1393                         PUTU32(ctx->Yi.c+12,ctr);
1394 #endif
1395                 else
1396                         ctx->Yi.d[3] = ctr;
1397                 while (len--) {
1398                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1399                         ++n;
1400                 }
1401         }
1402
1403         ctx->mres = n;
1404         return 0;
1405 }
1406
1407 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1408                 const unsigned char *in, unsigned char *out,
1409                 size_t len,ctr128_f stream)
1410 {
1411         const union { long one; char little; } is_endian = {1};
1412         unsigned int n, ctr;
1413         size_t i;
1414         u64   mlen = ctx->len.u[1];
1415         void *key  = ctx->key;
1416 #ifdef GCM_FUNCREF_4BIT
1417         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1418 # ifdef GHASH
1419         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1420                                 const u8 *inp,size_t len)       = ctx->ghash;
1421 # endif
1422 #endif
1423
1424         mlen += len;
1425         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1426                 return -1;
1427         ctx->len.u[1] = mlen;
1428
1429         if (ctx->ares) {
1430                 /* First call to decrypt finalizes GHASH(AAD) */
1431                 GCM_MUL(ctx,Xi);
1432                 ctx->ares = 0;
1433         }
1434
1435         if (is_endian.little)
1436 #ifdef BSWAP4
1437                 ctr = BSWAP4(ctx->Yi.d[3]);
1438 #else
1439                 ctr = GETU32(ctx->Yi.c+12);
1440 #endif
1441         else
1442                 ctr = ctx->Yi.d[3];
1443
1444         n = ctx->mres;
1445         if (n) {
1446                 while (n && len) {
1447                         u8 c = *(in++);
1448                         *(out++) = c^ctx->EKi.c[n];
1449                         ctx->Xi.c[n] ^= c;
1450                         --len;
1451                         n = (n+1)%16;
1452                 }
1453                 if (n==0) GCM_MUL (ctx,Xi);
1454                 else {
1455                         ctx->mres = n;
1456                         return 0;
1457                 }
1458         }
1459 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1460         while (len>=GHASH_CHUNK) {
1461                 GHASH(ctx,in,GHASH_CHUNK);
1462                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1463                 ctr += GHASH_CHUNK/16;
1464                 if (is_endian.little)
1465 #ifdef BSWAP4
1466                         ctx->Yi.d[3] = BSWAP4(ctr);
1467 #else
1468                         PUTU32(ctx->Yi.c+12,ctr);
1469 #endif
1470                 else
1471                         ctx->Yi.d[3] = ctr;
1472                 out += GHASH_CHUNK;
1473                 in  += GHASH_CHUNK;
1474                 len -= GHASH_CHUNK;
1475         }
1476 #endif
1477         if ((i = (len&(size_t)-16))) {
1478                 size_t j=i/16;
1479
1480 #if defined(GHASH)
1481                 GHASH(ctx,in,i);
1482 #else
1483                 while (j--) {
1484                         size_t k;
1485                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1486                         GCM_MUL(ctx,Xi);
1487                         in += 16;
1488                 }
1489                 j   = i/16;
1490                 in -= i;
1491 #endif
1492                 (*stream)(in,out,j,key,ctx->Yi.c);
1493                 ctr += (unsigned int)j;
1494                 if (is_endian.little)
1495 #ifdef BSWAP4
1496                         ctx->Yi.d[3] = BSWAP4(ctr);
1497 #else
1498                         PUTU32(ctx->Yi.c+12,ctr);
1499 #endif
1500                 else
1501                         ctx->Yi.d[3] = ctr;
1502                 out += i;
1503                 in  += i;
1504                 len -= i;
1505         }
1506         if (len) {
1507                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1508                 ++ctr;
1509                 if (is_endian.little)
1510 #ifdef BSWAP4
1511                         ctx->Yi.d[3] = BSWAP4(ctr);
1512 #else
1513                         PUTU32(ctx->Yi.c+12,ctr);
1514 #endif
1515                 else
1516                         ctx->Yi.d[3] = ctr;
1517                 while (len--) {
1518                         u8 c = in[n];
1519                         ctx->Xi.c[n] ^= c;
1520                         out[n] = c^ctx->EKi.c[n];
1521                         ++n;
1522                 }
1523         }
1524
1525         ctx->mres = n;
1526         return 0;
1527 }
1528
1529 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1530                         size_t len)
1531 {
1532         const union { long one; char little; } is_endian = {1};
1533         u64 alen = ctx->len.u[0]<<3;
1534         u64 clen = ctx->len.u[1]<<3;
1535 #ifdef GCM_FUNCREF_4BIT
1536         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1537 #endif
1538
1539         if (ctx->mres || ctx->ares)
1540                 GCM_MUL(ctx,Xi);
1541
1542         if (is_endian.little) {
1543 #ifdef BSWAP8
1544                 alen = BSWAP8(alen);
1545                 clen = BSWAP8(clen);
1546 #else
1547                 u8 *p = ctx->len.c;
1548
1549                 ctx->len.u[0] = alen;
1550                 ctx->len.u[1] = clen;
1551
1552                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1553                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1554 #endif
1555         }
1556
1557         ctx->Xi.u[0] ^= alen;
1558         ctx->Xi.u[1] ^= clen;
1559         GCM_MUL(ctx,Xi);
1560
1561         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1562         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1563
1564         if (tag && len<=sizeof(ctx->Xi))
1565                 return memcmp(ctx->Xi.c,tag,len);
1566         else
1567                 return -1;
1568 }
1569
1570 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1571 {
1572         CRYPTO_gcm128_finish(ctx, NULL, 0);
1573         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1574 }
1575
1576 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1577 {
1578         GCM128_CONTEXT *ret;
1579
1580         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1581                 CRYPTO_gcm128_init(ret,key,block);
1582
1583         return ret;
1584 }
1585
1586 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1587 {
1588         if (ctx) {
1589                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1590                 OPENSSL_free(ctx);
1591         }
1592 }
1593
1594 #if defined(SELFTEST)
1595 #include <stdio.h>
1596 #include <openssl/aes.h>
1597
1598 /* Test Case 1 */
1599 static const u8 K1[16],
1600                 *P1=NULL,
1601                 *A1=NULL,
1602                 IV1[12],
1603                 *C1=NULL,
1604                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1605
1606 /* Test Case 2 */
1607 #define K2 K1
1608 #define A2 A1
1609 #define IV2 IV1
1610 static const u8 P2[16],
1611                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1612                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1613
1614 /* Test Case 3 */
1615 #define A3 A2
1616 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1617                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1618                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1619                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1620                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1621                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1622                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1623                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1624                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1625                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1626                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1627
1628 /* Test Case 4 */
1629 #define K4 K3
1630 #define IV4 IV3
1631 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1632                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1633                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1634                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1635                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1636                         0xab,0xad,0xda,0xd2},
1637                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1638                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1639                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1640                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1641                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1642
1643 /* Test Case 5 */
1644 #define K5 K4
1645 #define P5 P4
1646 #define A5 A4
1647 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1648                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1649                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1650                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1651                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1652                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1653
1654 /* Test Case 6 */
1655 #define K6 K5
1656 #define P6 P5
1657 #define A6 A5
1658 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1659                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1660                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1661                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1662                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1663                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1664                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1665                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1666                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1667
1668 /* Test Case 7 */
1669 static const u8 K7[24],
1670                 *P7=NULL,
1671                 *A7=NULL,
1672                 IV7[12],
1673                 *C7=NULL,
1674                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1675
1676 /* Test Case 8 */
1677 #define K8 K7
1678 #define IV8 IV7
1679 #define A8 A7
1680 static const u8 P8[16],
1681                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1682                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1683
1684 /* Test Case 9 */
1685 #define A9 A8
1686 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1687                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1688                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1689                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1690                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1691                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1692                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1693                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1694                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1695                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1696                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1697                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1698
1699 /* Test Case 10 */
1700 #define K10 K9
1701 #define IV10 IV9
1702 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1703                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1704                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1705                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1706                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1707                         0xab,0xad,0xda,0xd2},
1708                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1709                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1710                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1711                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1712                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1713
1714 /* Test Case 11 */
1715 #define K11 K10
1716 #define P11 P10
1717 #define A11 A10
1718 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1719                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1720                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1721                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1722                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1723                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1724
1725 /* Test Case 12 */
1726 #define K12 K11
1727 #define P12 P11
1728 #define A12 A11
1729 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1730                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1731                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1732                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1733                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1734                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1735                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1736                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1737                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1738
1739 /* Test Case 13 */
1740 static const u8 K13[32],
1741                 *P13=NULL,
1742                 *A13=NULL,
1743                 IV13[12],
1744                 *C13=NULL,
1745                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1746
1747 /* Test Case 14 */
1748 #define K14 K13
1749 #define A14 A13
1750 static const u8 P14[16],
1751                 IV14[12],
1752                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1753                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1754
1755 /* Test Case 15 */
1756 #define A15 A14
1757 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1758                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1759                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1760                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1761                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1762                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1763                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1764                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1765                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1766                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1767                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1768                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1769
1770 /* Test Case 16 */
1771 #define K16 K15
1772 #define IV16 IV15
1773 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1774                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1775                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1776                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1777                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1778                         0xab,0xad,0xda,0xd2},
1779                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1780                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1781                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1782                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1783                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1784
1785 /* Test Case 17 */
1786 #define K17 K16
1787 #define P17 P16
1788 #define A17 A16
1789 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1790                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1791                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1792                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1793                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1794                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1795
1796 /* Test Case 18 */
1797 #define K18 K17
1798 #define P18 P17
1799 #define A18 A17
1800 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1801                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1802                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1803                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1804                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1805                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1806                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1807                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1808                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1809
1810 /* Test Case 19 */
1811 #define K19 K1
1812 #define P19 P1
1813 #define IV19 IV1
1814 #define C19 C1
1815 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1816                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1817                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1818                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1819                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1820                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1821                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1822                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1823                 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1824
1825 /* Test Case 20 */
1826 #define K20 K1
1827 #define A20 A1
1828 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1829                 P20[288],
1830                 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1831                         0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1832                         0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1833                         0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1834                         0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1835                         0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1836                         0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1837                         0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1838                         0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1839                         0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1840                         0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1841                         0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1842                         0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1843                         0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1844                         0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1845                         0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1846                         0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1847                         0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1848                 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1849
1850 #define TEST_CASE(n)    do {                                    \
1851         u8 out[sizeof(P##n)];                                   \
1852         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1853         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1854         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1855         memset(out,0,sizeof(out));                              \
1856         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1857         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1858         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1859             (C##n && memcmp(out,C##n,sizeof(out))))             \
1860                 ret++, printf ("encrypt test#%d failed.\n",n);  \
1861         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1862         memset(out,0,sizeof(out));                              \
1863         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1864         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1865         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1866             (P##n && memcmp(out,P##n,sizeof(out))))             \
1867                 ret++, printf ("decrypt test#%d failed.\n",n);  \
1868         } while(0)
1869
1870 int main()
1871 {
1872         GCM128_CONTEXT ctx;
1873         AES_KEY key;
1874         int ret=0;
1875
1876         TEST_CASE(1);
1877         TEST_CASE(2);
1878         TEST_CASE(3);
1879         TEST_CASE(4);
1880         TEST_CASE(5);
1881         TEST_CASE(6);
1882         TEST_CASE(7);
1883         TEST_CASE(8);
1884         TEST_CASE(9);
1885         TEST_CASE(10);
1886         TEST_CASE(11);
1887         TEST_CASE(12);
1888         TEST_CASE(13);
1889         TEST_CASE(14);
1890         TEST_CASE(15);
1891         TEST_CASE(16);
1892         TEST_CASE(17);
1893         TEST_CASE(18);
1894         TEST_CASE(19);
1895         TEST_CASE(20);
1896
1897 #ifdef OPENSSL_CPUID_OBJ
1898         {
1899         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1900         union { u64 u; u8 c[1024]; } buf;
1901         int i;
1902
1903         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1904         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1905         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1906
1907         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1908         start = OPENSSL_rdtsc();
1909         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1910         gcm_t = OPENSSL_rdtsc() - start;
1911
1912         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1913                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1914                         (block128_f)AES_encrypt);
1915         start = OPENSSL_rdtsc();
1916         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1917                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1918                         (block128_f)AES_encrypt);
1919         ctr_t = OPENSSL_rdtsc() - start;
1920
1921         printf("%.2f-%.2f=%.2f\n",
1922                         gcm_t/(double)sizeof(buf),
1923                         ctr_t/(double)sizeof(buf),
1924                         (gcm_t-ctr_t)/(double)sizeof(buf));
1925 #ifdef GHASH
1926         {
1927         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1928                                 const u8 *inp,size_t len)       = ctx.ghash;
1929
1930         GHASH((&ctx),buf.c,sizeof(buf));
1931         start = OPENSSL_rdtsc();
1932         for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1933         gcm_t = OPENSSL_rdtsc() - start;
1934         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1935         }
1936 #endif
1937         }
1938 #endif
1939
1940         return ret;
1941 }
1942 #endif