crypto/evp/e_aes_cbc_hmac_sha[1|256].c: fix compiler warnings.
[openssl.git] / crypto / evp / e_aes_cbc_hmac_sha256.c
1 /* ====================================================================
2  * Copyright (c) 2011-2013 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    licensing@OpenSSL.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #include <openssl/opensslconf.h>
51
52 #include <stdio.h>
53 #include <string.h>
54
55 #if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA256)
56
57 #include <openssl/evp.h>
58 #include <openssl/objects.h>
59 #include <openssl/aes.h>
60 #include <openssl/sha.h>
61 #include <openssl/rand.h>
62 #include "modes_lcl.h"
63
64 #ifndef EVP_CIPH_FLAG_AEAD_CIPHER
65 #define EVP_CIPH_FLAG_AEAD_CIPHER       0x200000
66 #define EVP_CTRL_AEAD_TLS1_AAD          0x16
67 #define EVP_CTRL_AEAD_SET_MAC_KEY       0x17
68 #endif
69
70 #if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1)
71 #define EVP_CIPH_FLAG_DEFAULT_ASN1 0
72 #endif
73
74 #if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)
75 #define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
76 #endif
77
78 #define TLS1_1_VERSION 0x0302
79
80 typedef struct
81     {
82     AES_KEY             ks;
83     SHA256_CTX          head,tail,md;
84     size_t              payload_length; /* AAD length in decrypt case */
85     union {
86         unsigned int    tls_ver;
87         unsigned char   tls_aad[16];    /* 13 used */
88     } aux;
89     } EVP_AES_HMAC_SHA256;
90
91 #define NO_PAYLOAD_LENGTH       ((size_t)-1)
92
93 #if     defined(AES_ASM) &&     ( \
94         defined(__x86_64)       || defined(__x86_64__)  || \
95         defined(_M_AMD64)       || defined(_M_X64)      || \
96         defined(__INTEL__)      )
97
98 extern unsigned int OPENSSL_ia32cap_P[3];
99 #define AESNI_CAPABLE   (1<<(57-32))
100
101 int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
102                               AES_KEY *key);
103 int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
104                               AES_KEY *key);
105
106 void aesni_cbc_encrypt(const unsigned char *in,
107                            unsigned char *out,
108                            size_t length,
109                            const AES_KEY *key,
110                            unsigned char *ivec, int enc);
111
112 int aesni_cbc_sha256_enc (const void *inp, void *out, size_t blocks,
113                 const AES_KEY *key, unsigned char iv[16],
114                 SHA256_CTX *ctx,const void *in0);
115
116 #define data(ctx) ((EVP_AES_HMAC_SHA256 *)(ctx)->cipher_data)
117
118 static int aesni_cbc_hmac_sha256_init_key(EVP_CIPHER_CTX *ctx,
119                         const unsigned char *inkey,
120                         const unsigned char *iv, int enc)
121         {
122         EVP_AES_HMAC_SHA256 *key = data(ctx);
123         int ret;
124
125         if (enc)
126                 memset(&key->ks,0,sizeof(key->ks.rd_key)),
127                 ret=aesni_set_encrypt_key(inkey,ctx->key_len*8,&key->ks);
128         else
129                 ret=aesni_set_decrypt_key(inkey,ctx->key_len*8,&key->ks);
130
131         SHA256_Init(&key->head);        /* handy when benchmarking */
132         key->tail = key->head;
133         key->md   = key->head;
134
135         key->payload_length = NO_PAYLOAD_LENGTH;
136
137         return ret<0?0:1;
138         }
139
140 #define STITCHED_CALL
141
142 #if !defined(STITCHED_CALL)
143 #define aes_off 0
144 #endif
145
146 void sha256_block_data_order (void *c,const void *p,size_t len);
147
148 static void sha256_update(SHA256_CTX *c,const void *data,size_t len)
149 {       const unsigned char *ptr = data;
150         size_t res;
151
152         if ((res = c->num)) {
153                 res = SHA256_CBLOCK-res;
154                 if (len<res) res=len;
155                 SHA256_Update (c,ptr,res);
156                 ptr += res;
157                 len -= res;
158         }
159
160         res = len % SHA256_CBLOCK;
161         len -= res;
162
163         if (len) {
164                 sha256_block_data_order(c,ptr,len/SHA256_CBLOCK);
165
166                 ptr += len;
167                 c->Nh += len>>29;
168                 c->Nl += len<<=3;
169                 if (c->Nl<(unsigned int)len) c->Nh++;
170         }
171
172         if (res)
173                 SHA256_Update(c,ptr,res);
174 }
175
176 #ifdef SHA256_Update
177 #undef SHA256_Update
178 #endif
179 #define SHA256_Update sha256_update
180
181 #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
182
183 typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8],F[8],G[8],H[8]; } SHA256_MB_CTX;
184 typedef struct { const unsigned char *ptr; int blocks;  } HASH_DESC;
185
186 void sha256_multi_block(SHA256_MB_CTX *,const HASH_DESC *,int);
187
188 typedef struct { const unsigned char *inp; unsigned char *out;
189                  int blocks; u64 iv[2]; } CIPH_DESC; 
190
191 void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int);
192
193 static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key,
194         unsigned char *out, const unsigned char *inp, size_t inp_len,
195         int n4x)        /* n4x is 1 or 2 */
196 {
197         HASH_DESC       hash_d[8], edges[8];
198         CIPH_DESC       ciph_d[8];
199         unsigned char   storage[sizeof(SHA256_MB_CTX)+32];
200         union { u64     q[16];
201                 u32     d[32];
202                 u8      c[128]; } blocks[8];
203         SHA256_MB_CTX   *ctx;
204         unsigned int    frag, last, packlen, i, x4=4*n4x, minblocks, processed=0;
205         size_t          ret = 0;
206         u8              *IVs;
207 #if defined(BSWAP8)
208         u64             seqnum;
209 #endif
210
211         if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0)     /* ask for IVs in bulk */
212                 return 0;
213
214         ctx = (SHA256_MB_CTX *)(storage+32-((size_t)storage%32));       /* align */
215
216         frag = (unsigned int)inp_len>>(1+n4x);
217         last = (unsigned int)inp_len+frag-(frag<<(1+n4x));
218         if (last>frag && ((last+13+9)%64)<(x4-1)) {
219                 frag++;
220                 last -= x4-1;
221         }
222
223         packlen = 5+16+((frag+32+16)&-16);
224
225         /* populate descriptors with pointers and IVs */
226         hash_d[0].ptr = inp;
227         ciph_d[0].inp = inp;
228         ciph_d[0].out = out+5+16;       /* 5+16 is place for header and explicit IV */
229         memcpy(ciph_d[0].out-16,IVs,16);
230         memcpy(ciph_d[0].iv,IVs,16);    IVs += 16;
231
232         for (i=1;i<x4;i++) {
233                 ciph_d[i].inp = hash_d[i].ptr = hash_d[i-1].ptr+frag;
234                 ciph_d[i].out = ciph_d[i-1].out+packlen;
235                 memcpy(ciph_d[i].out-16,IVs,16);
236                 memcpy(ciph_d[i].iv,IVs,16);    IVs+=16;
237         }
238
239 #if defined(BSWAP8)
240         memcpy(blocks[0].c,key->md.data,8);
241         seqnum = BSWAP8(blocks[0].q[0]);
242 #endif
243         for (i=0;i<x4;i++) {
244                 unsigned int len = (i==(x4-1)?last:frag);
245 #if !defined(BSWAP8)
246                 unsigned int carry, j;
247 #endif
248
249                 ctx->A[i] = key->md.h[0];
250                 ctx->B[i] = key->md.h[1];
251                 ctx->C[i] = key->md.h[2];
252                 ctx->D[i] = key->md.h[3];
253                 ctx->E[i] = key->md.h[4];
254                 ctx->F[i] = key->md.h[5];
255                 ctx->G[i] = key->md.h[6];
256                 ctx->H[i] = key->md.h[7];
257
258                 /* fix seqnum */
259 #if defined(BSWAP8)
260                 blocks[i].q[0] = BSWAP8(seqnum+i);
261 #else
262                 for (carry=i,j=8;j--;) {
263                         blocks[i].c[j] = ((u8*)key->md.data)[j]+carry;
264                         carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1);
265                 }
266 #endif
267                 blocks[i].c[8] = ((u8*)key->md.data)[8];
268                 blocks[i].c[9] = ((u8*)key->md.data)[9];
269                 blocks[i].c[10] = ((u8*)key->md.data)[10];
270                 /* fix length */
271                 blocks[i].c[11] = (u8)(len>>8);
272                 blocks[i].c[12] = (u8)(len);
273
274                 memcpy(blocks[i].c+13,hash_d[i].ptr,64-13);
275                 hash_d[i].ptr += 64-13;
276                 hash_d[i].blocks = (len-(64-13))/64;
277
278                 edges[i].ptr = blocks[i].c;
279                 edges[i].blocks = 1;
280         }
281
282         /* hash 13-byte headers and first 64-13 bytes of inputs */
283         sha256_multi_block(ctx,edges,n4x);
284         /* hash bulk inputs */
285 #define MAXCHUNKSIZE    2048
286 #if     MAXCHUNKSIZE%64
287 #error  "MAXCHUNKSIZE is not divisible by 64"
288 #elif   MAXCHUNKSIZE
289         /* goal is to minimize pressure on L1 cache by moving
290          * in shorter steps, so that hashed data is still in
291          * the cache by the time we encrypt it */
292         minblocks = ((frag<=last ? frag : last)-(64-13))/64;
293         if (minblocks>MAXCHUNKSIZE/64) {
294                 for (i=0;i<x4;i++) {
295                         edges[i].ptr     = hash_d[i].ptr;
296                         edges[i].blocks  = MAXCHUNKSIZE/64;
297                         ciph_d[i].blocks = MAXCHUNKSIZE/16;
298                 }
299                 do {
300                         sha256_multi_block(ctx,edges,n4x);
301                         aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);
302
303                         for (i=0;i<x4;i++) {
304                                 edges[i].ptr     = hash_d[i].ptr += MAXCHUNKSIZE;
305                                 hash_d[i].blocks -= MAXCHUNKSIZE/64;
306                                 edges[i].blocks  = MAXCHUNKSIZE/64;
307                                 ciph_d[i].inp    += MAXCHUNKSIZE;
308                                 ciph_d[i].out    += MAXCHUNKSIZE;
309                                 ciph_d[i].blocks = MAXCHUNKSIZE/16;
310                                 memcpy(ciph_d[i].iv,ciph_d[i].out-16,16);
311                         }
312                         processed += MAXCHUNKSIZE;
313                         minblocks -= MAXCHUNKSIZE/64;
314                 } while (minblocks>MAXCHUNKSIZE/64);
315         }
316 #endif
317 #undef  MAXCHUNKSIZE
318         sha256_multi_block(ctx,hash_d,n4x);
319
320         memset(blocks,0,sizeof(blocks));
321         for (i=0;i<x4;i++) {
322                 unsigned int            len = (i==(x4-1)?last:frag),
323                                         off = hash_d[i].blocks*64;
324                 const unsigned char    *ptr = hash_d[i].ptr+off;
325
326                 off = (len-processed)-(64-13)-off;      /* remainder actually */
327                 memcpy(blocks[i].c,ptr,off);
328                 blocks[i].c[off]=0x80;
329                 len += 64+13;           /* 64 is HMAC header */
330                 len *= 8;               /* convert to bits */
331                 if (off<(64-8)) {
332 #ifdef BSWAP4
333                         blocks[i].d[15] = BSWAP4(len);
334 #else
335                         PUTU32(blocks[i].c+60,len);
336 #endif
337                         edges[i].blocks = 1;                    
338                 } else {
339 #ifdef BSWAP4
340                         blocks[i].d[31] = BSWAP4(len);
341 #else
342                         PUTU32(blocks[i].c+124,len);
343 #endif
344                         edges[i].blocks = 2;
345                 }
346                 edges[i].ptr = blocks[i].c;
347         }
348
349         /* hash input tails and finalize */
350         sha256_multi_block(ctx,edges,n4x);
351
352         memset(blocks,0,sizeof(blocks));
353         for (i=0;i<x4;i++) {
354 #ifdef BSWAP4
355                 blocks[i].d[0] = BSWAP4(ctx->A[i]);     ctx->A[i] = key->tail.h[0];
356                 blocks[i].d[1] = BSWAP4(ctx->B[i]);     ctx->B[i] = key->tail.h[1];
357                 blocks[i].d[2] = BSWAP4(ctx->C[i]);     ctx->C[i] = key->tail.h[2];
358                 blocks[i].d[3] = BSWAP4(ctx->D[i]);     ctx->D[i] = key->tail.h[3];
359                 blocks[i].d[4] = BSWAP4(ctx->E[i]);     ctx->E[i] = key->tail.h[4];
360                 blocks[i].d[5] = BSWAP4(ctx->F[i]);     ctx->F[i] = key->tail.h[5];
361                 blocks[i].d[6] = BSWAP4(ctx->G[i]);     ctx->G[i] = key->tail.h[6];
362                 blocks[i].d[7] = BSWAP4(ctx->H[i]);     ctx->H[i] = key->tail.h[7];
363                 blocks[i].c[32] = 0x80;
364                 blocks[i].d[15] = BSWAP4((64+32)*8);
365 #else
366                 PUTU32(blocks[i].c+0,ctx->A[i]);        ctx->A[i] = key->tail.h[0];
367                 PUTU32(blocks[i].c+4,ctx->B[i]);        ctx->B[i] = key->tail.h[1];
368                 PUTU32(blocks[i].c+8,ctx->C[i]);        ctx->C[i] = key->tail.h[2];
369                 PUTU32(blocks[i].c+12,ctx->D[i]);       ctx->D[i] = key->tail.h[3];
370                 PUTU32(blocks[i].c+16,ctx->E[i]);       ctx->E[i] = key->tail.h[4];
371                 PUTU32(blocks[i].c+20,ctx->F[i]);       ctx->F[i] = key->tail.h[5];
372                 PUTU32(blocks[i].c+24,ctx->G[i]);       ctx->G[i] = key->tail.h[6];
373                 PUTU32(blocks[i].c+28,ctx->H[i]);       ctx->H[i] = key->tail.h[7];
374                 blocks[i].c[32] = 0x80;
375                 PUTU32(blocks[i].c+60,(64+32)*8);
376 #endif
377                 edges[i].ptr = blocks[i].c;
378                 edges[i].blocks = 1;
379         }
380
381         /* finalize MACs */
382         sha256_multi_block(ctx,edges,n4x);
383
384         for (i=0;i<x4;i++) {
385                 unsigned int len = (i==(x4-1)?last:frag), pad, j;
386                 unsigned char *out0 = out;
387
388                 memcpy(ciph_d[i].out,ciph_d[i].inp,len-processed);
389                 ciph_d[i].inp = ciph_d[i].out;
390
391                 out += 5+16+len;
392
393                 /* write MAC */
394                 PUTU32(out+0,ctx->A[i]);
395                 PUTU32(out+4,ctx->B[i]);
396                 PUTU32(out+8,ctx->C[i]);
397                 PUTU32(out+12,ctx->D[i]);
398                 PUTU32(out+16,ctx->E[i]);
399                 PUTU32(out+20,ctx->F[i]);
400                 PUTU32(out+24,ctx->G[i]);
401                 PUTU32(out+28,ctx->H[i]);
402                 out += 32;
403                 len += 32;
404
405                 /* pad */
406                 pad = 15-len%16;
407                 for (j=0;j<=pad;j++) *(out++) = pad;
408                 len += pad+1;
409
410                 ciph_d[i].blocks = (len-processed)/16;
411                 len += 16;      /* account for explicit iv */
412
413                 /* arrange header */
414                 out0[0] = ((u8*)key->md.data)[8];
415                 out0[1] = ((u8*)key->md.data)[9];
416                 out0[2] = ((u8*)key->md.data)[10];
417                 out0[3] = (u8)(len>>8);
418                 out0[4] = (u8)(len);
419
420                 ret += len+5;
421                 inp += frag;
422         }
423
424         aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);
425
426         OPENSSL_cleanse(blocks,sizeof(blocks));
427         OPENSSL_cleanse(ctx,sizeof(*ctx));
428
429         return ret;
430 }
431 #endif
432
433 static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
434                       const unsigned char *in, size_t len)
435         {
436         EVP_AES_HMAC_SHA256 *key = data(ctx);
437         unsigned int l;
438         size_t  plen = key->payload_length,
439                 iv = 0,         /* explicit IV in TLS 1.1 and later */
440                 sha_off = 0;
441 #if defined(STITCHED_CALL)
442         size_t  aes_off = 0,
443                 blocks;
444
445         sha_off = SHA256_CBLOCK-key->md.num;
446 #endif
447
448         key->payload_length = NO_PAYLOAD_LENGTH;
449
450         if (len%AES_BLOCK_SIZE) return 0;
451
452         if (ctx->encrypt) {
453                 if (plen==NO_PAYLOAD_LENGTH)
454                         plen = len;
455                 else if (len!=((plen+SHA256_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE))
456                         return 0;
457                 else if (key->aux.tls_ver >= TLS1_1_VERSION)
458                         iv = AES_BLOCK_SIZE;
459
460 #if defined(STITCHED_CALL)
461                 if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && /* AVX? */
462                     plen>(sha_off+iv) &&
463                     (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) {
464                         SHA256_Update(&key->md,in+iv,sha_off);
465
466                         (void)aesni_cbc_sha256_enc(in,out,blocks,&key->ks,
467                                 ctx->iv,&key->md,in+iv+sha_off);
468                         blocks *= SHA256_CBLOCK;
469                         aes_off += blocks;
470                         sha_off += blocks;
471                         key->md.Nh += blocks>>29;
472                         key->md.Nl += blocks<<=3;
473                         if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
474                 } else {
475                         sha_off = 0;
476                 }
477 #endif
478                 sha_off += iv;
479                 SHA256_Update(&key->md,in+sha_off,plen-sha_off);
480
481                 if (plen!=len)  {       /* "TLS" mode of operation */
482                         if (in!=out)
483                                 memcpy(out+aes_off,in+aes_off,plen-aes_off);
484
485                         /* calculate HMAC and append it to payload */
486                         SHA256_Final(out+plen,&key->md);
487                         key->md = key->tail;
488                         SHA256_Update(&key->md,out+plen,SHA256_DIGEST_LENGTH);
489                         SHA256_Final(out+plen,&key->md);
490
491                         /* pad the payload|hmac */
492                         plen += SHA256_DIGEST_LENGTH;
493                         for (l=len-plen-1;plen<len;plen++) out[plen]=l;
494                         /* encrypt HMAC|padding at once */
495                         aesni_cbc_encrypt(out+aes_off,out+aes_off,len-aes_off,
496                                         &key->ks,ctx->iv,1);
497                 } else {
498                         aesni_cbc_encrypt(in+aes_off,out+aes_off,len-aes_off,
499                                         &key->ks,ctx->iv,1);
500                 }
501         } else {
502                 union { unsigned int  u[SHA256_DIGEST_LENGTH/sizeof(unsigned int)];
503                         unsigned char c[64+SHA256_DIGEST_LENGTH]; } mac, *pmac;
504
505                 /* arrange cache line alignment */
506                 pmac = (void *)(((size_t)mac.c+63)&((size_t)0-64));
507
508                 /* decrypt HMAC|padding at once */
509                 aesni_cbc_encrypt(in,out,len,
510                                 &key->ks,ctx->iv,0);
511
512                 if (plen != NO_PAYLOAD_LENGTH) {        /* "TLS" mode of operation */
513                         size_t inp_len, mask, j, i;
514                         unsigned int res, maxpad, pad, bitlen;
515                         int ret = 1;
516                         union { unsigned int  u[SHA_LBLOCK];
517                                 unsigned char c[SHA256_CBLOCK]; }
518                                 *data = (void *)key->md.data;
519
520                         if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3])
521                             >= TLS1_1_VERSION)
522                                 iv = AES_BLOCK_SIZE;
523
524                         if (len<(iv+SHA256_DIGEST_LENGTH+1))
525                                 return 0;
526
527                         /* omit explicit iv */
528                         out += iv;
529                         len -= iv;
530
531                         /* figure out payload length */
532                         pad = out[len-1];
533                         maxpad = len-(SHA256_DIGEST_LENGTH+1);
534                         maxpad |= (255-maxpad)>>(sizeof(maxpad)*8-8);
535                         maxpad &= 255;
536
537                         inp_len = len - (SHA256_DIGEST_LENGTH+pad+1);
538                         mask = (0-((inp_len-len)>>(sizeof(inp_len)*8-1)));
539                         inp_len &= mask;
540                         ret &= (int)mask;
541
542                         key->aux.tls_aad[plen-2] = inp_len>>8;
543                         key->aux.tls_aad[plen-1] = inp_len;
544
545                         /* calculate HMAC */
546                         key->md = key->head;
547                         SHA256_Update(&key->md,key->aux.tls_aad,plen);
548
549 #if 1
550                         len -= SHA256_DIGEST_LENGTH;            /* amend mac */
551                         if (len>=(256+SHA256_CBLOCK)) {
552                                 j = (len-(256+SHA256_CBLOCK))&(0-SHA256_CBLOCK);
553                                 j += SHA256_CBLOCK-key->md.num;
554                                 SHA256_Update(&key->md,out,j);
555                                 out += j;
556                                 len -= j;
557                                 inp_len -= j;
558                         }
559
560                         /* but pretend as if we hashed padded payload */
561                         bitlen = key->md.Nl+(inp_len<<3);       /* at most 18 bits */
562 #ifdef BSWAP4
563                         bitlen = BSWAP4(bitlen);
564 #else
565                         mac.c[0] = 0;
566                         mac.c[1] = (unsigned char)(bitlen>>16);
567                         mac.c[2] = (unsigned char)(bitlen>>8);
568                         mac.c[3] = (unsigned char)bitlen;
569                         bitlen = mac.u[0];
570 #endif
571
572                         pmac->u[0]=0;
573                         pmac->u[1]=0;
574                         pmac->u[2]=0;
575                         pmac->u[3]=0;
576                         pmac->u[4]=0;
577                         pmac->u[5]=0;
578                         pmac->u[6]=0;
579                         pmac->u[7]=0;
580
581                         for (res=key->md.num, j=0;j<len;j++) {
582                                 size_t c = out[j];
583                                 mask = (j-inp_len)>>(sizeof(j)*8-8);
584                                 c &= mask;
585                                 c |= 0x80&~mask&~((inp_len-j)>>(sizeof(j)*8-8));
586                                 data->c[res++]=(unsigned char)c;
587
588                                 if (res!=SHA256_CBLOCK) continue;
589
590                                 /* j is not incremented yet */
591                                 mask = 0-((inp_len+7-j)>>(sizeof(j)*8-1));
592                                 data->u[SHA_LBLOCK-1] |= bitlen&mask;
593                                 sha256_block_data_order(&key->md,data,1);
594                                 mask &= 0-((j-inp_len-72)>>(sizeof(j)*8-1));
595                                 pmac->u[0] |= key->md.h[0] & mask;
596                                 pmac->u[1] |= key->md.h[1] & mask;
597                                 pmac->u[2] |= key->md.h[2] & mask;
598                                 pmac->u[3] |= key->md.h[3] & mask;
599                                 pmac->u[4] |= key->md.h[4] & mask;
600                                 pmac->u[5] |= key->md.h[5] & mask;
601                                 pmac->u[6] |= key->md.h[6] & mask;
602                                 pmac->u[7] |= key->md.h[7] & mask;
603                                 res=0;
604                         }
605
606                         for(i=res;i<SHA256_CBLOCK;i++,j++) data->c[i]=0;
607
608                         if (res>SHA256_CBLOCK-8) {
609                                 mask = 0-((inp_len+8-j)>>(sizeof(j)*8-1));
610                                 data->u[SHA_LBLOCK-1] |= bitlen&mask;
611                                 sha256_block_data_order(&key->md,data,1);
612                                 mask &= 0-((j-inp_len-73)>>(sizeof(j)*8-1));
613                                 pmac->u[0] |= key->md.h[0] & mask;
614                                 pmac->u[1] |= key->md.h[1] & mask;
615                                 pmac->u[2] |= key->md.h[2] & mask;
616                                 pmac->u[3] |= key->md.h[3] & mask;
617                                 pmac->u[4] |= key->md.h[4] & mask;
618                                 pmac->u[5] |= key->md.h[5] & mask;
619                                 pmac->u[6] |= key->md.h[6] & mask;
620                                 pmac->u[7] |= key->md.h[7] & mask;
621
622                                 memset(data,0,SHA256_CBLOCK);
623                                 j+=64;
624                         }
625                         data->u[SHA_LBLOCK-1] = bitlen;
626                         sha256_block_data_order(&key->md,data,1);
627                         mask = 0-((j-inp_len-73)>>(sizeof(j)*8-1));
628                         pmac->u[0] |= key->md.h[0] & mask;
629                         pmac->u[1] |= key->md.h[1] & mask;
630                         pmac->u[2] |= key->md.h[2] & mask;
631                         pmac->u[3] |= key->md.h[3] & mask;
632                         pmac->u[4] |= key->md.h[4] & mask;
633                         pmac->u[5] |= key->md.h[5] & mask;
634                         pmac->u[6] |= key->md.h[6] & mask;
635                         pmac->u[7] |= key->md.h[7] & mask;
636
637 #ifdef BSWAP4
638                         pmac->u[0] = BSWAP4(pmac->u[0]);
639                         pmac->u[1] = BSWAP4(pmac->u[1]);
640                         pmac->u[2] = BSWAP4(pmac->u[2]);
641                         pmac->u[3] = BSWAP4(pmac->u[3]);
642                         pmac->u[4] = BSWAP4(pmac->u[4]);
643                         pmac->u[5] = BSWAP4(pmac->u[5]);
644                         pmac->u[6] = BSWAP4(pmac->u[6]);
645                         pmac->u[7] = BSWAP4(pmac->u[7]);
646 #else
647                         for (i=0;i<8;i++) {
648                                 res = pmac->u[i];
649                                 pmac->c[4*i+0]=(unsigned char)(res>>24);
650                                 pmac->c[4*i+1]=(unsigned char)(res>>16);
651                                 pmac->c[4*i+2]=(unsigned char)(res>>8);
652                                 pmac->c[4*i+3]=(unsigned char)res;
653                         }
654 #endif
655                         len += SHA256_DIGEST_LENGTH;
656 #else
657                         SHA256_Update(&key->md,out,inp_len);
658                         res = key->md.num;
659                         SHA256_Final(pmac->c,&key->md);
660
661                         {
662                         unsigned int inp_blocks, pad_blocks;
663
664                         /* but pretend as if we hashed padded payload */
665                         inp_blocks = 1+((SHA256_CBLOCK-9-res)>>(sizeof(res)*8-1));
666                         res += (unsigned int)(len-inp_len);
667                         pad_blocks = res / SHA256_CBLOCK;
668                         res %= SHA256_CBLOCK;
669                         pad_blocks += 1+((SHA256_CBLOCK-9-res)>>(sizeof(res)*8-1));
670                         for (;inp_blocks<pad_blocks;inp_blocks++)
671                                 sha1_block_data_order(&key->md,data,1);
672                         }
673 #endif
674                         key->md = key->tail;
675                         SHA256_Update(&key->md,pmac->c,SHA256_DIGEST_LENGTH);
676                         SHA256_Final(pmac->c,&key->md);
677
678                         /* verify HMAC */
679                         out += inp_len;
680                         len -= inp_len;
681 #if 1
682                         {
683                         unsigned char *p = out+len-1-maxpad-SHA256_DIGEST_LENGTH;
684                         size_t off = out-p;
685                         unsigned int c, cmask;
686
687                         maxpad += SHA256_DIGEST_LENGTH;
688                         for (res=0,i=0,j=0;j<maxpad;j++) {
689                                 c = p[j];
690                                 cmask = ((int)(j-off-SHA256_DIGEST_LENGTH))>>(sizeof(int)*8-1);
691                                 res |= (c^pad)&~cmask;  /* ... and padding */
692                                 cmask &= ((int)(off-1-j))>>(sizeof(int)*8-1);
693                                 res |= (c^pmac->c[i])&cmask;
694                                 i += 1&cmask;
695                         }
696                         maxpad -= SHA256_DIGEST_LENGTH;
697
698                         res = 0-((0-res)>>(sizeof(res)*8-1));
699                         ret &= (int)~res;
700                         }
701 #else
702                         for (res=0,i=0;i<SHA256_DIGEST_LENGTH;i++)
703                                 res |= out[i]^pmac->c[i];
704                         res = 0-((0-res)>>(sizeof(res)*8-1));
705                         ret &= (int)~res;
706
707                         /* verify padding */
708                         pad = (pad&~res) | (maxpad&res);
709                         out = out+len-1-pad;
710                         for (res=0,i=0;i<pad;i++)
711                                 res |= out[i]^pad;
712
713                         res = (0-res)>>(sizeof(res)*8-1);
714                         ret &= (int)~res;
715 #endif
716                         return ret;
717                 } else {
718                         SHA256_Update(&key->md,out,len);
719                 }
720         }
721
722         return 1;
723         }
724
725 static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr)
726         {
727         EVP_AES_HMAC_SHA256 *key = data(ctx);
728         unsigned int u_arg = (unsigned int)arg;
729
730         switch (type)
731                 {
732         case EVP_CTRL_AEAD_SET_MAC_KEY:
733                 {
734                 unsigned int  i;
735                 unsigned char hmac_key[64];
736
737                 memset (hmac_key,0,sizeof(hmac_key));
738
739                 if (arg < 0)
740                         return -1;
741
742                 if (u_arg > sizeof(hmac_key)) {
743                         SHA256_Init(&key->head);
744                         SHA256_Update(&key->head,ptr,arg);
745                         SHA256_Final(hmac_key,&key->head);
746                 } else {
747                         memcpy(hmac_key,ptr,arg);
748                 }
749
750                 for (i=0;i<sizeof(hmac_key);i++)
751                         hmac_key[i] ^= 0x36;            /* ipad */
752                 SHA256_Init(&key->head);
753                 SHA256_Update(&key->head,hmac_key,sizeof(hmac_key));
754
755                 for (i=0;i<sizeof(hmac_key);i++)
756                         hmac_key[i] ^= 0x36^0x5c;       /* opad */
757                 SHA256_Init(&key->tail);
758                 SHA256_Update(&key->tail,hmac_key,sizeof(hmac_key));
759
760                 OPENSSL_cleanse(hmac_key,sizeof(hmac_key));
761
762                 return 1;
763                 }
764         case EVP_CTRL_AEAD_TLS1_AAD:
765                 {
766                 unsigned char *p=ptr;
767                 unsigned int   len=p[arg-2]<<8|p[arg-1];
768
769                 if (ctx->encrypt)
770                         {
771                         key->payload_length = len;
772                         if ((key->aux.tls_ver=p[arg-4]<<8|p[arg-3]) >= TLS1_1_VERSION) {
773                                 len -= AES_BLOCK_SIZE;
774                                 p[arg-2] = len>>8;
775                                 p[arg-1] = len;
776                         }
777                         key->md = key->head;
778                         SHA256_Update(&key->md,p,arg);
779
780                         return (int)(((len+SHA256_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE)
781                                 - len);
782                         }
783                 else
784                         {
785                         if (arg>13) arg = 13;
786                         memcpy(key->aux.tls_aad,ptr,arg);
787                         key->payload_length = arg;
788
789                         return SHA256_DIGEST_LENGTH;
790                         }
791                 }
792 #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
793         case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
794                 return (int)(5+16+((arg+32+16)&-16));
795         case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
796                 {
797                 EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
798                         (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;
799                 unsigned int n4x=1, x4;
800                 unsigned int frag, last, packlen, inp_len;
801
802                 if (arg < 0)
803                         return -1;
804
805                 if (u_arg < sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) return -1;
806
807                 inp_len = param->inp[11]<<8|param->inp[12];
808
809                 if (ctx->encrypt)
810                         {
811                         if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION)
812                                 return -1;
813
814                         if (inp_len)
815                                 {
816                                 if (inp_len<4096) return 0;     /* too short */
817
818                                 if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5))
819                                         n4x=2;  /* AVX2 */
820                                 }
821                         else if ((n4x=param->interleave/4) && n4x<=2)
822                                 inp_len = param->len;
823                         else
824                                 return -1;
825
826                         key->md = key->head;
827                         SHA256_Update(&key->md,param->inp,13);
828
829                         x4 = 4*n4x; n4x += 1;
830
831                         frag = inp_len>>n4x;
832                         last = inp_len+frag-(frag<<n4x);
833                         if (last>frag && ((last+13+9)%64<(x4-1))) {
834                                 frag++;
835                                 last -= x4-1;
836                         }
837
838                         packlen = 5+16+((frag+32+16)&-16);
839                         packlen = (packlen<<n4x)-packlen;
840                         packlen += 5+16+((last+32+16)&-16);
841
842                         param->interleave = x4;
843
844                         return (int)packlen;
845                         }
846                 else
847                         return -1;      /* not yet */
848                 }
849         case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT:
850                 {
851                 EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
852                         (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;
853
854                 return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp,
855                                                 param->len,param->interleave/4);
856                 }
857         case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT:
858 #endif
859         default:
860                 return -1;
861                 }
862         }
863
864 static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher =
865         {
866 #ifdef NID_aes_128_cbc_hmac_sha256
867         NID_aes_128_cbc_hmac_sha256,
868 #else
869         NID_undef,
870 #endif
871         16,16,16,
872         EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
873         EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
874         aesni_cbc_hmac_sha256_init_key,
875         aesni_cbc_hmac_sha256_cipher,
876         NULL,
877         sizeof(EVP_AES_HMAC_SHA256),
878         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv,
879         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv,
880         aesni_cbc_hmac_sha256_ctrl,
881         NULL
882         };
883
884 static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher =
885         {
886 #ifdef NID_aes_256_cbc_hmac_sha256
887         NID_aes_256_cbc_hmac_sha256,
888 #else
889         NID_undef,
890 #endif
891         16,32,16,
892         EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
893         EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
894         aesni_cbc_hmac_sha256_init_key,
895         aesni_cbc_hmac_sha256_cipher,
896         NULL,
897         sizeof(EVP_AES_HMAC_SHA256),
898         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv,
899         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv,
900         aesni_cbc_hmac_sha256_ctrl,
901         NULL
902         };
903
904 const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void)
905         {
906         return((OPENSSL_ia32cap_P[1]&AESNI_CAPABLE) &&
907                 aesni_cbc_sha256_enc(NULL,NULL,0,NULL,NULL,NULL,NULL) ?
908                 &aesni_128_cbc_hmac_sha256_cipher:NULL);
909         }
910
911 const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void)
912         {
913         return((OPENSSL_ia32cap_P[1]&AESNI_CAPABLE) &&
914                 aesni_cbc_sha256_enc(NULL,NULL,0,NULL,NULL,NULL,NULL)?
915                 &aesni_256_cbc_hmac_sha256_cipher:NULL);
916         }
917 #else
918 const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void)
919         {
920         return NULL;
921         }
922 const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void)
923         {
924         return NULL;
925         }
926 #endif
927 #endif