ec76393576800637b0035dd056cf821fd8f5a412
[openssl.git] / crypto / evp / e_aes_cbc_hmac_sha1.c
1 /* ====================================================================
2  * Copyright (c) 2011-2013 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    licensing@OpenSSL.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #include <openssl/opensslconf.h>
51
52 #include <stdio.h>
53 #include <string.h>
54
55 #if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA1)
56
57 #include <openssl/evp.h>
58 #include <openssl/objects.h>
59 #include <openssl/aes.h>
60 #include <openssl/sha.h>
61 #include <openssl/rand.h>
62 #include "modes_lcl.h"
63
64 #ifndef EVP_CIPH_FLAG_AEAD_CIPHER
65 #define EVP_CIPH_FLAG_AEAD_CIPHER       0x200000
66 #define EVP_CTRL_AEAD_TLS1_AAD          0x16
67 #define EVP_CTRL_AEAD_SET_MAC_KEY       0x17
68 #endif
69
70 #if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1)
71 #define EVP_CIPH_FLAG_DEFAULT_ASN1 0
72 #endif
73
74 #if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)
75 #define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
76 #endif
77
78 #define TLS1_1_VERSION 0x0302
79
80 typedef struct
81     {
82     AES_KEY             ks;
83     SHA_CTX             head,tail,md;
84     size_t              payload_length; /* AAD length in decrypt case */
85     union {
86         unsigned int    tls_ver;
87         unsigned char   tls_aad[16];    /* 13 used */
88     } aux;
89     } EVP_AES_HMAC_SHA1;
90
91 #define NO_PAYLOAD_LENGTH       ((size_t)-1)
92
93 #if     defined(AES_ASM) &&     ( \
94         defined(__x86_64)       || defined(__x86_64__)  || \
95         defined(_M_AMD64)       || defined(_M_X64)      || \
96         defined(__INTEL__)      )
97
98 extern unsigned int OPENSSL_ia32cap_P[3];
99 #define AESNI_CAPABLE   (1<<(57-32))
100
101 int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
102                               AES_KEY *key);
103 int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
104                               AES_KEY *key);
105
106 void aesni_cbc_encrypt(const unsigned char *in,
107                            unsigned char *out,
108                            size_t length,
109                            const AES_KEY *key,
110                            unsigned char *ivec, int enc);
111
112 void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks,
113                 const AES_KEY *key, unsigned char iv[16],
114                 SHA_CTX *ctx,const void *in0);
115
116 void aesni256_cbc_sha1_dec (const void *inp, void *out, size_t blocks,
117                 const AES_KEY *key, unsigned char iv[16],
118                 SHA_CTX *ctx,const void *in0);
119
120 #define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data)
121
122 static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
123                         const unsigned char *inkey,
124                         const unsigned char *iv, int enc)
125         {
126         EVP_AES_HMAC_SHA1 *key = data(ctx);
127         int ret;
128
129         if (enc)
130                 ret=aesni_set_encrypt_key(inkey,ctx->key_len*8,&key->ks);
131         else
132                 ret=aesni_set_decrypt_key(inkey,ctx->key_len*8,&key->ks);
133
134         SHA1_Init(&key->head);  /* handy when benchmarking */
135         key->tail = key->head;
136         key->md   = key->head;
137
138         key->payload_length = NO_PAYLOAD_LENGTH;
139
140         return ret<0?0:1;
141         }
142
143 #define STITCHED_CALL
144 #undef  STITCHED_DECRYPT_CALL
145
146 #if !defined(STITCHED_CALL)
147 #define aes_off 0
148 #endif
149
150 void sha1_block_data_order (void *c,const void *p,size_t len);
151
152 static void sha1_update(SHA_CTX *c,const void *data,size_t len)
153 {       const unsigned char *ptr = data;
154         size_t res;
155
156         if ((res = c->num)) {
157                 res = SHA_CBLOCK-res;
158                 if (len<res) res=len;
159                 SHA1_Update (c,ptr,res);
160                 ptr += res;
161                 len -= res;
162         }
163
164         res = len % SHA_CBLOCK;
165         len -= res;
166
167         if (len) {
168                 sha1_block_data_order(c,ptr,len/SHA_CBLOCK);
169
170                 ptr += len;
171                 c->Nh += len>>29;
172                 c->Nl += len<<=3;
173                 if (c->Nl<(unsigned int)len) c->Nh++;
174         }
175
176         if (res)
177                 SHA1_Update(c,ptr,res);
178 }
179
180 #ifdef SHA1_Update
181 #undef SHA1_Update
182 #endif
183 #define SHA1_Update sha1_update
184
185 #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
186
187 typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8]; } SHA1_MB_CTX;
188 typedef struct { const unsigned char *ptr; int blocks;  } HASH_DESC;
189
190 void sha1_multi_block(SHA1_MB_CTX *,const HASH_DESC *,int);
191
192 typedef struct { const unsigned char *inp; unsigned char *out;
193                  int blocks; u64 iv[2]; } CIPH_DESC; 
194
195 void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int);
196
197 static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
198         unsigned char *out, const unsigned char *inp, size_t inp_len,
199         int n4x)        /* n4x is 1 or 2 */
200 {
201         HASH_DESC       hash_d[8], edges[8];
202         CIPH_DESC       ciph_d[8];
203         unsigned char   storage[sizeof(SHA1_MB_CTX)+32];
204         union { u64     q[16];
205                 u32     d[32];
206                 u8      c[128]; } blocks[8];
207         SHA1_MB_CTX     *ctx;
208         unsigned int    frag, last, packlen, i, x4=4*n4x, minblocks, processed=0;
209         size_t          ret = 0;
210         u8              *IVs;
211 #if defined(BSWAP8)
212         u64             seqnum;
213 #endif
214
215         if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0)     /* ask for IVs in bulk */
216                 return 0;
217
218         ctx = (SHA1_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */
219
220         frag = (unsigned int)inp_len>>(1+n4x);
221         last = (unsigned int)inp_len+frag-(frag<<(1+n4x));
222         if (last>frag && ((last+13+9)%64)<(x4-1)) {
223                 frag++;
224                 last -= x4-1;
225         }
226
227         packlen = 5+16+((frag+20+16)&-16);
228
229         /* populate descriptors with pointers and IVs */
230         hash_d[0].ptr = inp;
231         ciph_d[0].inp = inp;
232         ciph_d[0].out = out+5+16;       /* 5+16 is place for header and explicit IV */
233         memcpy(ciph_d[0].out-16,IVs,16);
234         memcpy(ciph_d[0].iv,IVs,16);    IVs += 16;
235
236         for (i=1;i<x4;i++) {
237                 ciph_d[i].inp = hash_d[i].ptr = hash_d[i-1].ptr+frag;
238                 ciph_d[i].out = ciph_d[i-1].out+packlen;
239                 memcpy(ciph_d[i].out-16,IVs,16);
240                 memcpy(ciph_d[i].iv,IVs,16);    IVs+=16;
241         }
242
243 #if defined(BSWAP8)
244         memcpy(blocks[0].c,key->md.data,8);
245         seqnum = BSWAP8(blocks[0].q[0]);
246 #endif
247         for (i=0;i<x4;i++) {
248                 unsigned int len = (i==(x4-1)?last:frag);
249 #if !defined(BSWAP8)
250                 unsigned int carry, j;
251 #endif
252
253                 ctx->A[i] = key->md.h0;
254                 ctx->B[i] = key->md.h1;
255                 ctx->C[i] = key->md.h2;
256                 ctx->D[i] = key->md.h3;
257                 ctx->E[i] = key->md.h4;
258
259                 /* fix seqnum */
260 #if defined(BSWAP8)
261                 blocks[i].q[0] = BSWAP8(seqnum+i);
262 #else
263                 for (carry=i,j=8;j--;) {
264                         blocks[i].c[j] = ((u8*)key->md.data)[j]+carry;
265                         carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1);
266                 }
267 #endif
268                 blocks[i].c[8] = ((u8*)key->md.data)[8];
269                 blocks[i].c[9] = ((u8*)key->md.data)[9];
270                 blocks[i].c[10] = ((u8*)key->md.data)[10];
271                 /* fix length */
272                 blocks[i].c[11] = (u8)(len>>8);
273                 blocks[i].c[12] = (u8)(len);
274
275                 memcpy(blocks[i].c+13,hash_d[i].ptr,64-13);
276                 hash_d[i].ptr += 64-13;
277                 hash_d[i].blocks = (len-(64-13))/64;
278
279                 edges[i].ptr = blocks[i].c;
280                 edges[i].blocks = 1;
281         }
282
283         /* hash 13-byte headers and first 64-13 bytes of inputs */
284         sha1_multi_block(ctx,edges,n4x);
285         /* hash bulk inputs */
286 #define MAXCHUNKSIZE    2048
287 #if     MAXCHUNKSIZE%64
288 #error  "MAXCHUNKSIZE is not divisible by 64"
289 #elif   MAXCHUNKSIZE
290         /* goal is to minimize pressure on L1 cache by moving
291          * in shorter steps, so that hashed data is still in
292          * the cache by the time we encrypt it */
293         minblocks = ((frag<=last ? frag : last)-(64-13))/64;
294         if (minblocks>MAXCHUNKSIZE/64) {
295                 for (i=0;i<x4;i++) {
296                         edges[i].ptr     = hash_d[i].ptr;
297                         edges[i].blocks  = MAXCHUNKSIZE/64;
298                         ciph_d[i].blocks = MAXCHUNKSIZE/16;
299                 }
300                 do {
301                         sha1_multi_block(ctx,edges,n4x);
302                         aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);
303
304                         for (i=0;i<x4;i++) {
305                                 edges[i].ptr     = hash_d[i].ptr += MAXCHUNKSIZE;
306                                 hash_d[i].blocks -= MAXCHUNKSIZE/64;
307                                 edges[i].blocks  = MAXCHUNKSIZE/64;
308                                 ciph_d[i].inp    += MAXCHUNKSIZE;
309                                 ciph_d[i].out    += MAXCHUNKSIZE;
310                                 ciph_d[i].blocks = MAXCHUNKSIZE/16;
311                                 memcpy(ciph_d[i].iv,ciph_d[i].out-16,16);
312                         }
313                         processed += MAXCHUNKSIZE;
314                         minblocks -= MAXCHUNKSIZE/64;
315                 } while (minblocks>MAXCHUNKSIZE/64);
316         }
317 #endif
318 #undef  MAXCHUNKSIZE
319         sha1_multi_block(ctx,hash_d,n4x);
320
321         memset(blocks,0,sizeof(blocks));
322         for (i=0;i<x4;i++) {
323                 unsigned int            len = (i==(x4-1)?last:frag),
324                                         off = hash_d[i].blocks*64;
325                 const unsigned char    *ptr = hash_d[i].ptr+off;
326
327                 off = (len-processed)-(64-13)-off;      /* remainder actually */
328                 memcpy(blocks[i].c,ptr,off);
329                 blocks[i].c[off]=0x80;
330                 len += 64+13;           /* 64 is HMAC header */
331                 len *= 8;               /* convert to bits */
332                 if (off<(64-8)) {
333 #ifdef BSWAP4
334                         blocks[i].d[15] = BSWAP4(len);
335 #else
336                         PUTU32(blocks[i].c+60,len);
337 #endif
338                         edges[i].blocks = 1;                    
339                 } else {
340 #ifdef BSWAP4
341                         blocks[i].d[31] = BSWAP4(len);
342 #else
343                         PUTU32(blocks[i].c+124,len);
344 #endif
345                         edges[i].blocks = 2;
346                 }
347                 edges[i].ptr = blocks[i].c;
348         }
349
350         /* hash input tails and finalize */
351         sha1_multi_block(ctx,edges,n4x);
352
353         memset(blocks,0,sizeof(blocks));
354         for (i=0;i<x4;i++) {
355 #ifdef BSWAP4
356                 blocks[i].d[0] = BSWAP4(ctx->A[i]);     ctx->A[i] = key->tail.h0;
357                 blocks[i].d[1] = BSWAP4(ctx->B[i]);     ctx->B[i] = key->tail.h1;
358                 blocks[i].d[2] = BSWAP4(ctx->C[i]);     ctx->C[i] = key->tail.h2;
359                 blocks[i].d[3] = BSWAP4(ctx->D[i]);     ctx->D[i] = key->tail.h3;
360                 blocks[i].d[4] = BSWAP4(ctx->E[i]);     ctx->E[i] = key->tail.h4;
361                 blocks[i].c[20] = 0x80;
362                 blocks[i].d[15] = BSWAP4((64+20)*8);
363 #else
364                 PUTU32(blocks[i].c+0,ctx->A[i]);        ctx->A[i] = key->tail.h0;
365                 PUTU32(blocks[i].c+4,ctx->B[i]);        ctx->B[i] = key->tail.h1;
366                 PUTU32(blocks[i].c+8,ctx->C[i]);        ctx->C[i] = key->tail.h2;
367                 PUTU32(blocks[i].c+12,ctx->D[i]);       ctx->D[i] = key->tail.h3;
368                 PUTU32(blocks[i].c+16,ctx->E[i]);       ctx->E[i] = key->tail.h4;
369                 blocks[i].c[20] = 0x80;
370                 PUTU32(blocks[i].c+60,(64+20)*8);
371 #endif
372                 edges[i].ptr = blocks[i].c;
373                 edges[i].blocks = 1;
374         }
375
376         /* finalize MACs */
377         sha1_multi_block(ctx,edges,n4x);
378
379         for (i=0;i<x4;i++) {
380                 unsigned int len = (i==(x4-1)?last:frag), pad, j;
381                 unsigned char *out0 = out;
382
383                 memcpy(ciph_d[i].out,ciph_d[i].inp,len-processed);
384                 ciph_d[i].inp = ciph_d[i].out;
385
386                 out += 5+16+len;
387
388                 /* write MAC */
389                 PUTU32(out+0,ctx->A[i]);
390                 PUTU32(out+4,ctx->B[i]);
391                 PUTU32(out+8,ctx->C[i]);
392                 PUTU32(out+12,ctx->D[i]);
393                 PUTU32(out+16,ctx->E[i]);
394                 out += 20;
395                 len += 20;
396
397                 /* pad */
398                 pad = 15-len%16;
399                 for (j=0;j<=pad;j++) *(out++) = pad;
400                 len += pad+1;
401
402                 ciph_d[i].blocks = (len-processed)/16;
403                 len += 16;      /* account for explicit iv */
404
405                 /* arrange header */
406                 out0[0] = ((u8*)key->md.data)[8];
407                 out0[1] = ((u8*)key->md.data)[9];
408                 out0[2] = ((u8*)key->md.data)[10];
409                 out0[3] = (u8)(len>>8);
410                 out0[4] = (u8)(len);
411
412                 ret += len+5;
413                 inp += frag;
414         }
415
416         aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);
417
418         OPENSSL_cleanse(blocks,sizeof(blocks));
419         OPENSSL_cleanse(ctx,sizeof(*ctx));
420
421         return ret;
422 }
423 #endif
424
425 static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
426                       const unsigned char *in, size_t len)
427         {
428         EVP_AES_HMAC_SHA1 *key = data(ctx);
429         unsigned int l;
430         size_t  plen = key->payload_length,
431                 iv = 0,         /* explicit IV in TLS 1.1 and later */
432                 sha_off = 0;
433 #if defined(STITCHED_CALL)
434         size_t  aes_off = 0,
435                 blocks;
436
437         sha_off = SHA_CBLOCK-key->md.num;
438 #endif
439
440         key->payload_length = NO_PAYLOAD_LENGTH;
441
442         if (len%AES_BLOCK_SIZE) return 0;
443
444         if (ctx->encrypt) {
445                 if (plen==NO_PAYLOAD_LENGTH)
446                         plen = len;
447                 else if (len!=((plen+SHA_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE))
448                         return 0;
449                 else if (key->aux.tls_ver >= TLS1_1_VERSION)
450                         iv = AES_BLOCK_SIZE;
451
452 #if defined(STITCHED_CALL)
453                 if (plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA_CBLOCK)) {
454                         SHA1_Update(&key->md,in+iv,sha_off);
455
456                         aesni_cbc_sha1_enc(in,out,blocks,&key->ks,
457                                 ctx->iv,&key->md,in+iv+sha_off);
458                         blocks *= SHA_CBLOCK;
459                         aes_off += blocks;
460                         sha_off += blocks;
461                         key->md.Nh += blocks>>29;
462                         key->md.Nl += blocks<<=3;
463                         if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
464                 } else {
465                         sha_off = 0;
466                 }
467 #endif
468                 sha_off += iv;
469                 SHA1_Update(&key->md,in+sha_off,plen-sha_off);
470
471                 if (plen!=len)  {       /* "TLS" mode of operation */
472                         if (in!=out)
473                                 memcpy(out+aes_off,in+aes_off,plen-aes_off);
474
475                         /* calculate HMAC and append it to payload */
476                         SHA1_Final(out+plen,&key->md);
477                         key->md = key->tail;
478                         SHA1_Update(&key->md,out+plen,SHA_DIGEST_LENGTH);
479                         SHA1_Final(out+plen,&key->md);
480
481                         /* pad the payload|hmac */
482                         plen += SHA_DIGEST_LENGTH;
483                         for (l=len-plen-1;plen<len;plen++) out[plen]=l;
484                         /* encrypt HMAC|padding at once */
485                         aesni_cbc_encrypt(out+aes_off,out+aes_off,len-aes_off,
486                                         &key->ks,ctx->iv,1);
487                 } else {
488                         aesni_cbc_encrypt(in+aes_off,out+aes_off,len-aes_off,
489                                         &key->ks,ctx->iv,1);
490                 }
491         } else {
492                 union { unsigned int  u[SHA_DIGEST_LENGTH/sizeof(unsigned int)];
493                         unsigned char c[32+SHA_DIGEST_LENGTH]; } mac, *pmac;
494
495                 /* arrange cache line alignment */
496                 pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32));
497
498                 if (plen != NO_PAYLOAD_LENGTH) {        /* "TLS" mode of operation */
499                         size_t inp_len, mask, j, i;
500                         unsigned int res, maxpad, pad, bitlen;
501                         int ret = 1;
502                         union { unsigned int  u[SHA_LBLOCK];
503                                 unsigned char c[SHA_CBLOCK]; }
504                                 *data = (void *)key->md.data;
505 #if defined(STITCHED_DECRYPT_CALL)
506                         unsigned char tail_iv[AES_BLOCK_SIZE];
507                         int stitch=0;
508 #endif
509
510                         if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3])
511                             >= TLS1_1_VERSION) {
512                                 if (len<(AES_BLOCK_SIZE+SHA_DIGEST_LENGTH+1))
513                                         return 0;
514
515                                 /* omit explicit iv */
516                                 memcpy(ctx->iv,in,AES_BLOCK_SIZE);
517                                 in  += AES_BLOCK_SIZE;
518                                 out += AES_BLOCK_SIZE;
519                                 len -= AES_BLOCK_SIZE;
520                         }
521                         else if (len<(SHA_DIGEST_LENGTH+1))
522                                 return 0;
523
524 #if defined(STITCHED_DECRYPT_CALL)
525                         if (len>=1024 && ctx->key_len==32) {
526                                 /* decrypt last block */
527                                 memcpy(tail_iv,in+len-2*AES_BLOCK_SIZE,AES_BLOCK_SIZE);
528                                 aesni_cbc_encrypt(in+len-AES_BLOCK_SIZE,
529                                                 out+len-AES_BLOCK_SIZE,AES_BLOCK_SIZE,
530                                                 &key->ks,tail_iv,0);
531                                 stitch=1;
532                         } else
533 #endif
534                         /* decrypt HMAC|padding at once */
535                         aesni_cbc_encrypt(in,out,len,
536                                         &key->ks,ctx->iv,0);
537
538                         /* figure out payload length */
539                         pad = out[len-1];
540                         maxpad = len-(SHA_DIGEST_LENGTH+1);
541                         maxpad |= (255-maxpad)>>(sizeof(maxpad)*8-8);
542                         maxpad &= 255;
543
544                         inp_len = len - (SHA_DIGEST_LENGTH+pad+1);
545                         mask = (0-((inp_len-len)>>(sizeof(inp_len)*8-1)));
546                         inp_len &= mask;
547                         ret &= (int)mask;
548
549                         key->aux.tls_aad[plen-2] = inp_len>>8;
550                         key->aux.tls_aad[plen-1] = inp_len;
551
552                         /* calculate HMAC */
553                         key->md = key->head;
554                         SHA1_Update(&key->md,key->aux.tls_aad,plen);
555
556 #if defined(STITCHED_DECRYPT_CALL)
557                         if (stitch) {
558                                 blocks = (len-(256+32+SHA_CBLOCK))/SHA_CBLOCK;
559                                 aes_off = len-AES_BLOCK_SIZE-blocks*SHA_CBLOCK;
560                                 sha_off = SHA_CBLOCK-plen;
561
562                                 aesni_cbc_encrypt(in,out,aes_off,
563                                         &key->ks,ctx->iv,0);
564
565                                 SHA1_Update(&key->md,out,sha_off);
566                                 aesni256_cbc_sha1_dec(in+aes_off,
567                                         out+aes_off,blocks,&key->ks,ctx->iv,
568                                         &key->md,out+sha_off);
569
570                                 sha_off += blocks*=SHA_CBLOCK;
571                                 out += sha_off;
572                                 len -= sha_off;
573                                 inp_len -= sha_off;
574
575                                 key->md.Nl += (blocks<<3);      /* at most 18 bits */
576                                 memcpy(ctx->iv,tail_iv,AES_BLOCK_SIZE);
577                         }
578 #endif
579
580 #if 1
581                         len -= SHA_DIGEST_LENGTH;               /* amend mac */
582                         if (len>=(256+SHA_CBLOCK)) {
583                                 j = (len-(256+SHA_CBLOCK))&(0-SHA_CBLOCK);
584                                 j += SHA_CBLOCK-key->md.num;
585                                 SHA1_Update(&key->md,out,j);
586                                 out += j;
587                                 len -= j;
588                                 inp_len -= j;
589                         }
590
591                         /* but pretend as if we hashed padded payload */
592                         bitlen = key->md.Nl+(inp_len<<3);       /* at most 18 bits */
593 #ifdef BSWAP4
594                         bitlen = BSWAP4(bitlen);
595 #else
596                         mac.c[0] = 0;
597                         mac.c[1] = (unsigned char)(bitlen>>16);
598                         mac.c[2] = (unsigned char)(bitlen>>8);
599                         mac.c[3] = (unsigned char)bitlen;
600                         bitlen = mac.u[0];
601 #endif
602
603                         pmac->u[0]=0;
604                         pmac->u[1]=0;
605                         pmac->u[2]=0;
606                         pmac->u[3]=0;
607                         pmac->u[4]=0;
608
609                         for (res=key->md.num, j=0;j<len;j++) {
610                                 size_t c = out[j];
611                                 mask = (j-inp_len)>>(sizeof(j)*8-8);
612                                 c &= mask;
613                                 c |= 0x80&~mask&~((inp_len-j)>>(sizeof(j)*8-8));
614                                 data->c[res++]=(unsigned char)c;
615
616                                 if (res!=SHA_CBLOCK) continue;
617
618                                 /* j is not incremented yet */
619                                 mask = 0-((inp_len+7-j)>>(sizeof(j)*8-1));
620                                 data->u[SHA_LBLOCK-1] |= bitlen&mask;
621                                 sha1_block_data_order(&key->md,data,1);
622                                 mask &= 0-((j-inp_len-72)>>(sizeof(j)*8-1));
623                                 pmac->u[0] |= key->md.h0 & mask;
624                                 pmac->u[1] |= key->md.h1 & mask;
625                                 pmac->u[2] |= key->md.h2 & mask;
626                                 pmac->u[3] |= key->md.h3 & mask;
627                                 pmac->u[4] |= key->md.h4 & mask;
628                                 res=0;
629                         }
630
631                         for(i=res;i<SHA_CBLOCK;i++,j++) data->c[i]=0;
632
633                         if (res>SHA_CBLOCK-8) {
634                                 mask = 0-((inp_len+8-j)>>(sizeof(j)*8-1));
635                                 data->u[SHA_LBLOCK-1] |= bitlen&mask;
636                                 sha1_block_data_order(&key->md,data,1);
637                                 mask &= 0-((j-inp_len-73)>>(sizeof(j)*8-1));
638                                 pmac->u[0] |= key->md.h0 & mask;
639                                 pmac->u[1] |= key->md.h1 & mask;
640                                 pmac->u[2] |= key->md.h2 & mask;
641                                 pmac->u[3] |= key->md.h3 & mask;
642                                 pmac->u[4] |= key->md.h4 & mask;
643
644                                 memset(data,0,SHA_CBLOCK);
645                                 j+=64;
646                         }
647                         data->u[SHA_LBLOCK-1] = bitlen;
648                         sha1_block_data_order(&key->md,data,1);
649                         mask = 0-((j-inp_len-73)>>(sizeof(j)*8-1));
650                         pmac->u[0] |= key->md.h0 & mask;
651                         pmac->u[1] |= key->md.h1 & mask;
652                         pmac->u[2] |= key->md.h2 & mask;
653                         pmac->u[3] |= key->md.h3 & mask;
654                         pmac->u[4] |= key->md.h4 & mask;
655
656 #ifdef BSWAP4
657                         pmac->u[0] = BSWAP4(pmac->u[0]);
658                         pmac->u[1] = BSWAP4(pmac->u[1]);
659                         pmac->u[2] = BSWAP4(pmac->u[2]);
660                         pmac->u[3] = BSWAP4(pmac->u[3]);
661                         pmac->u[4] = BSWAP4(pmac->u[4]);
662 #else
663                         for (i=0;i<5;i++) {
664                                 res = pmac->u[i];
665                                 pmac->c[4*i+0]=(unsigned char)(res>>24);
666                                 pmac->c[4*i+1]=(unsigned char)(res>>16);
667                                 pmac->c[4*i+2]=(unsigned char)(res>>8);
668                                 pmac->c[4*i+3]=(unsigned char)res;
669                         }
670 #endif
671                         len += SHA_DIGEST_LENGTH;
672 #else
673                         SHA1_Update(&key->md,out,inp_len);
674                         res = key->md.num;
675                         SHA1_Final(pmac->c,&key->md);
676
677                         {
678                         unsigned int inp_blocks, pad_blocks;
679
680                         /* but pretend as if we hashed padded payload */
681                         inp_blocks = 1+((SHA_CBLOCK-9-res)>>(sizeof(res)*8-1));
682                         res += (unsigned int)(len-inp_len);
683                         pad_blocks = res / SHA_CBLOCK;
684                         res %= SHA_CBLOCK;
685                         pad_blocks += 1+((SHA_CBLOCK-9-res)>>(sizeof(res)*8-1));
686                         for (;inp_blocks<pad_blocks;inp_blocks++)
687                                 sha1_block_data_order(&key->md,data,1);
688                         }
689 #endif
690                         key->md = key->tail;
691                         SHA1_Update(&key->md,pmac->c,SHA_DIGEST_LENGTH);
692                         SHA1_Final(pmac->c,&key->md);
693
694                         /* verify HMAC */
695                         out += inp_len;
696                         len -= inp_len;
697 #if 1
698                         {
699                         unsigned char *p = out+len-1-maxpad-SHA_DIGEST_LENGTH;
700                         size_t off = out-p;
701                         unsigned int c, cmask;
702
703                         maxpad += SHA_DIGEST_LENGTH;
704                         for (res=0,i=0,j=0;j<maxpad;j++) {
705                                 c = p[j];
706                                 cmask = ((int)(j-off-SHA_DIGEST_LENGTH))>>(sizeof(int)*8-1);
707                                 res |= (c^pad)&~cmask;  /* ... and padding */
708                                 cmask &= ((int)(off-1-j))>>(sizeof(int)*8-1);
709                                 res |= (c^pmac->c[i])&cmask;
710                                 i += 1&cmask;
711                         }
712                         maxpad -= SHA_DIGEST_LENGTH;
713
714                         res = 0-((0-res)>>(sizeof(res)*8-1));
715                         ret &= (int)~res;
716                         }
717 #else
718                         for (res=0,i=0;i<SHA_DIGEST_LENGTH;i++)
719                                 res |= out[i]^pmac->c[i];
720                         res = 0-((0-res)>>(sizeof(res)*8-1));
721                         ret &= (int)~res;
722
723                         /* verify padding */
724                         pad = (pad&~res) | (maxpad&res);
725                         out = out+len-1-pad;
726                         for (res=0,i=0;i<pad;i++)
727                                 res |= out[i]^pad;
728
729                         res = (0-res)>>(sizeof(res)*8-1);
730                         ret &= (int)~res;
731 #endif
732                         return ret;
733                 } else {
734 #if defined(STITCHED_DECRYPT_CALL)
735                         if (len>=1024 && ctx->key_len==32) {
736                                 if (sha_off%=SHA_CBLOCK)
737                                         blocks = (len-3*SHA_CBLOCK)/SHA_CBLOCK;
738                                 else
739                                         blocks = (len-2*SHA_CBLOCK)/SHA_CBLOCK;
740                                 aes_off = len-blocks*SHA_CBLOCK;
741
742                                 aesni_cbc_encrypt(in,out,aes_off,
743                                         &key->ks,ctx->iv,0);
744                                 SHA1_Update(&key->md,out,sha_off);
745                                 aesni256_cbc_sha1_dec(in+aes_off,
746                                         out+aes_off,blocks,&key->ks,ctx->iv,
747                                         &key->md,out+sha_off);
748
749                                 sha_off += blocks*=SHA_CBLOCK;
750                                 out += sha_off;
751                                 len -= sha_off;
752
753                                 key->md.Nh += blocks>>29;
754                                 key->md.Nl += blocks<<=3;
755                                 if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
756                         } else
757 #endif
758                         /* decrypt HMAC|padding at once */
759                         aesni_cbc_encrypt(in,out,len,
760                                         &key->ks,ctx->iv,0);
761
762                         SHA1_Update(&key->md,out,len);
763                 }
764         }
765
766         return 1;
767         }
768
769 static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr)
770         {
771         EVP_AES_HMAC_SHA1 *key = data(ctx);
772
773         switch (type)
774                 {
775         case EVP_CTRL_AEAD_SET_MAC_KEY:
776                 {
777                 unsigned int  i;
778                 unsigned char hmac_key[64];
779
780                 memset (hmac_key,0,sizeof(hmac_key));
781
782                 if (arg > (int)sizeof(hmac_key)) {
783                         SHA1_Init(&key->head);
784                         SHA1_Update(&key->head,ptr,arg);
785                         SHA1_Final(hmac_key,&key->head);
786                 } else {
787                         memcpy(hmac_key,ptr,arg);
788                 }
789
790                 for (i=0;i<sizeof(hmac_key);i++)
791                         hmac_key[i] ^= 0x36;            /* ipad */
792                 SHA1_Init(&key->head);
793                 SHA1_Update(&key->head,hmac_key,sizeof(hmac_key));
794
795                 for (i=0;i<sizeof(hmac_key);i++)
796                         hmac_key[i] ^= 0x36^0x5c;       /* opad */
797                 SHA1_Init(&key->tail);
798                 SHA1_Update(&key->tail,hmac_key,sizeof(hmac_key));
799
800                 OPENSSL_cleanse(hmac_key,sizeof(hmac_key));
801
802                 return 1;
803                 }
804         case EVP_CTRL_AEAD_TLS1_AAD:
805                 {
806                 unsigned char *p=ptr;
807                 unsigned int   len=p[arg-2]<<8|p[arg-1];
808
809                 if (ctx->encrypt)
810                         {
811                         key->payload_length = len;
812                         if ((key->aux.tls_ver=p[arg-4]<<8|p[arg-3]) >= TLS1_1_VERSION) {
813                                 len -= AES_BLOCK_SIZE;
814                                 p[arg-2] = len>>8;
815                                 p[arg-1] = len;
816                         }
817                         key->md = key->head;
818                         SHA1_Update(&key->md,p,arg);
819
820                         return (int)(((len+SHA_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE)
821                                 - len);
822                         }
823                 else
824                         {
825                         if (arg>13) arg = 13;
826                         memcpy(key->aux.tls_aad,ptr,arg);
827                         key->payload_length = arg;
828
829                         return SHA_DIGEST_LENGTH;
830                         }
831                 }
832 #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
833         case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
834                 return (int)(5+16+((arg+20+16)&-16));
835         case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
836                 {
837                 EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
838                         (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;
839                 unsigned int n4x=1, x4;
840                 unsigned int frag, last, packlen, inp_len;
841
842                 if (arg<(int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) return -1;
843
844                 inp_len = param->inp[11]<<8|param->inp[12];
845
846                 if (ctx->encrypt)
847                         {
848                         if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION)
849                                 return -1;
850
851                         if (inp_len)
852                                 {
853                                 if (inp_len<4096) return 0;     /* too short */
854
855                                 if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5))
856                                         n4x=2;  /* AVX2 */
857                                 }
858                         else if ((n4x=param->interleave/4) && n4x<=2)
859                                 inp_len = param->len;
860                         else
861                                 return -1;
862
863                         key->md = key->head;
864                         SHA1_Update(&key->md,param->inp,13);
865
866                         x4 = 4*n4x; n4x += 1;
867
868                         frag = inp_len>>n4x;
869                         last = inp_len+frag-(frag<<n4x);
870                         if (last>frag && ((last+13+9)%64<(x4-1))) {
871                                 frag++;
872                                 last -= x4-1;
873                         }
874
875                         packlen = 5+16+((frag+20+16)&-16);
876                         packlen = (packlen<<n4x)-packlen;
877                         packlen += 5+16+((last+20+16)&-16);
878
879                         param->interleave = x4;
880
881                         return (int)packlen;
882                         }
883                 else
884                         return -1;      /* not yet */
885                 }
886         case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT:
887                 {
888                 EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
889                         (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;
890
891                 return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp,
892                                                 param->len,param->interleave/4);
893                 }
894         case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT:
895 #endif
896         default:
897                 return -1;
898                 }
899         }
900
901 static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher =
902         {
903 #ifdef NID_aes_128_cbc_hmac_sha1
904         NID_aes_128_cbc_hmac_sha1,
905 #else
906         NID_undef,
907 #endif
908         16,16,16,
909         EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
910         EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
911         aesni_cbc_hmac_sha1_init_key,
912         aesni_cbc_hmac_sha1_cipher,
913         NULL,
914         sizeof(EVP_AES_HMAC_SHA1),
915         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv,
916         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv,
917         aesni_cbc_hmac_sha1_ctrl,
918         NULL
919         };
920
921 static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher =
922         {
923 #ifdef NID_aes_256_cbc_hmac_sha1
924         NID_aes_256_cbc_hmac_sha1,
925 #else
926         NID_undef,
927 #endif
928         16,32,16,
929         EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
930         EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
931         aesni_cbc_hmac_sha1_init_key,
932         aesni_cbc_hmac_sha1_cipher,
933         NULL,
934         sizeof(EVP_AES_HMAC_SHA1),
935         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv,
936         EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv,
937         aesni_cbc_hmac_sha1_ctrl,
938         NULL
939         };
940
941 const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void)
942         {
943         return(OPENSSL_ia32cap_P[1]&AESNI_CAPABLE?
944                 &aesni_128_cbc_hmac_sha1_cipher:NULL);
945         }
946
947 const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void)
948         {
949         return(OPENSSL_ia32cap_P[1]&AESNI_CAPABLE?
950                 &aesni_256_cbc_hmac_sha1_cipher:NULL);
951         }
952 #else
953 const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void)
954         {
955         return NULL;
956         }
957 const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void)
958         {
959         return NULL;
960         }
961 #endif
962 #endif