crypto/evp/e_aes_cbc_hmac_sha[1|256].c: fix compiler warnings.
[openssl.git] / crypto / evp / e_aes_cbc_hmac_sha1.c
index 15e9638..ec76393 100644 (file)
@@ -113,6 +113,10 @@ void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks,
                const AES_KEY *key, unsigned char iv[16],
                SHA_CTX *ctx,const void *in0);
 
+void aesni256_cbc_sha1_dec (const void *inp, void *out, size_t blocks,
+               const AES_KEY *key, unsigned char iv[16],
+               SHA_CTX *ctx,const void *in0);
+
 #define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data)
 
 static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
@@ -137,6 +141,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
        }
 
 #define        STITCHED_CALL
+#undef STITCHED_DECRYPT_CALL
 
 #if !defined(STITCHED_CALL)
 #define        aes_off 0
@@ -200,9 +205,15 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
                u32     d[32];
                u8      c[128]; } blocks[8];
        SHA1_MB_CTX     *ctx;
-       unsigned int    frag, last, packlen, i, x4=4*n4x;
+       unsigned int    frag, last, packlen, i, x4=4*n4x, minblocks, processed=0;
        size_t          ret = 0;
        u8              *IVs;
+#if defined(BSWAP8)
+       u64             seqnum;
+#endif
+
+       if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0)     /* ask for IVs in bulk */
+               return 0;
 
        ctx = (SHA1_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */
 
@@ -213,11 +224,31 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
                last -= x4-1;
        }
 
+       packlen = 5+16+((frag+20+16)&-16);
+
+       /* populate descriptors with pointers and IVs */
        hash_d[0].ptr = inp;
-       for (i=1;i<x4;i++)      hash_d[i].ptr = hash_d[i-1].ptr+frag;
+       ciph_d[0].inp = inp;
+       ciph_d[0].out = out+5+16;       /* 5+16 is place for header and explicit IV */
+       memcpy(ciph_d[0].out-16,IVs,16);
+       memcpy(ciph_d[0].iv,IVs,16);    IVs += 16;
+
+       for (i=1;i<x4;i++) {
+               ciph_d[i].inp = hash_d[i].ptr = hash_d[i-1].ptr+frag;
+               ciph_d[i].out = ciph_d[i-1].out+packlen;
+               memcpy(ciph_d[i].out-16,IVs,16);
+               memcpy(ciph_d[i].iv,IVs,16);    IVs+=16;
+       }
 
+#if defined(BSWAP8)
+       memcpy(blocks[0].c,key->md.data,8);
+       seqnum = BSWAP8(blocks[0].q[0]);
+#endif
        for (i=0;i<x4;i++) {
                unsigned int len = (i==(x4-1)?last:frag);
+#if !defined(BSWAP8)
+               unsigned int carry, j;
+#endif
 
                ctx->A[i] = key->md.h0;
                ctx->B[i] = key->md.h1;
@@ -227,15 +258,11 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
 
                /* fix seqnum */
 #if defined(BSWAP8)
-               blocks[i].q[0] = BSWAP8(BSWAP8(*(u64*)key->md.data)+i);
+               blocks[i].q[0] = BSWAP8(seqnum+i);
 #else
-               blocks[i].c[7] += ((u8*)key->md.data)[7]+i;
-               if (blocks[i].c[7] < i) {
-                       int j;
-
-                       for (j=6;j>=0;j--) {
-                               if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break;
-                       }
+               for (carry=i,j=8;j--;) {
+                       blocks[i].c[j] = ((u8*)key->md.data)[j]+carry;
+                       carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1);
                }
 #endif
                blocks[i].c[8] = ((u8*)key->md.data)[8];
@@ -256,6 +283,39 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
        /* hash 13-byte headers and first 64-13 bytes of inputs */
        sha1_multi_block(ctx,edges,n4x);
        /* hash bulk inputs */
+#define        MAXCHUNKSIZE    2048
+#if    MAXCHUNKSIZE%64
+#error "MAXCHUNKSIZE is not divisible by 64"
+#elif  MAXCHUNKSIZE
+       /* goal is to minimize pressure on L1 cache by moving
+        * in shorter steps, so that hashed data is still in
+        * the cache by the time we encrypt it */
+       minblocks = ((frag<=last ? frag : last)-(64-13))/64;
+       if (minblocks>MAXCHUNKSIZE/64) {
+               for (i=0;i<x4;i++) {
+                       edges[i].ptr     = hash_d[i].ptr;
+                       edges[i].blocks  = MAXCHUNKSIZE/64;
+                       ciph_d[i].blocks = MAXCHUNKSIZE/16;
+               }
+               do {
+                       sha1_multi_block(ctx,edges,n4x);
+                       aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);
+
+                       for (i=0;i<x4;i++) {
+                               edges[i].ptr     = hash_d[i].ptr += MAXCHUNKSIZE;
+                               hash_d[i].blocks -= MAXCHUNKSIZE/64;
+                               edges[i].blocks  = MAXCHUNKSIZE/64;
+                               ciph_d[i].inp    += MAXCHUNKSIZE;
+                               ciph_d[i].out    += MAXCHUNKSIZE;
+                               ciph_d[i].blocks = MAXCHUNKSIZE/16;
+                               memcpy(ciph_d[i].iv,ciph_d[i].out-16,16);
+                       }
+                       processed += MAXCHUNKSIZE;
+                       minblocks -= MAXCHUNKSIZE/64;
+               } while (minblocks>MAXCHUNKSIZE/64);
+       }
+#endif
+#undef MAXCHUNKSIZE
        sha1_multi_block(ctx,hash_d,n4x);
 
        memset(blocks,0,sizeof(blocks));
@@ -264,16 +324,24 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
                                        off = hash_d[i].blocks*64;
                const unsigned char    *ptr = hash_d[i].ptr+off;
 
-               off = len-(64-13)-off;  /* remainder actually */
+               off = (len-processed)-(64-13)-off;      /* remainder actually */
                memcpy(blocks[i].c,ptr,off);
                blocks[i].c[off]=0x80;
                len += 64+13;           /* 64 is HMAC header */
                len *= 8;               /* convert to bits */
                if (off<(64-8)) {
+#ifdef BSWAP4
                        blocks[i].d[15] = BSWAP4(len);
+#else
+                       PUTU32(blocks[i].c+60,len);
+#endif
                        edges[i].blocks = 1;                    
                } else {
+#ifdef BSWAP4
                        blocks[i].d[31] = BSWAP4(len);
+#else
+                       PUTU32(blocks[i].c+124,len);
+#endif
                        edges[i].blocks = 2;
                }
                edges[i].ptr = blocks[i].c;
@@ -284,6 +352,7 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
 
        memset(blocks,0,sizeof(blocks));
        for (i=0;i<x4;i++) {
+#ifdef BSWAP4
                blocks[i].d[0] = BSWAP4(ctx->A[i]);     ctx->A[i] = key->tail.h0;
                blocks[i].d[1] = BSWAP4(ctx->B[i]);     ctx->B[i] = key->tail.h1;
                blocks[i].d[2] = BSWAP4(ctx->C[i]);     ctx->C[i] = key->tail.h2;
@@ -291,6 +360,15 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
                blocks[i].d[4] = BSWAP4(ctx->E[i]);     ctx->E[i] = key->tail.h4;
                blocks[i].c[20] = 0x80;
                blocks[i].d[15] = BSWAP4((64+20)*8);
+#else
+               PUTU32(blocks[i].c+0,ctx->A[i]);        ctx->A[i] = key->tail.h0;
+               PUTU32(blocks[i].c+4,ctx->B[i]);        ctx->B[i] = key->tail.h1;
+               PUTU32(blocks[i].c+8,ctx->C[i]);        ctx->C[i] = key->tail.h2;
+               PUTU32(blocks[i].c+12,ctx->D[i]);       ctx->D[i] = key->tail.h3;
+               PUTU32(blocks[i].c+16,ctx->E[i]);       ctx->E[i] = key->tail.h4;
+               blocks[i].c[20] = 0x80;
+               PUTU32(blocks[i].c+60,(64+20)*8);
+#endif
                edges[i].ptr = blocks[i].c;
                edges[i].blocks = 1;
        }
@@ -298,30 +376,21 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
        /* finalize MACs */
        sha1_multi_block(ctx,edges,n4x);
 
-       packlen = 5+16+((frag+20+16)&-16);
-
-       out += (packlen<<(1+n4x))-packlen;
-       inp += (frag<<(1+n4x))-frag;
-
-       RAND_bytes((IVs=blocks[0].c),16*x4);    /* ask for IVs in bulk */
-
-       for (i=x4-1;;i--) {
+       for (i=0;i<x4;i++) {
                unsigned int len = (i==(x4-1)?last:frag), pad, j;
                unsigned char *out0 = out;
 
-               out += 5+16;            /* place for header and explicit IV */
-               ciph_d[i].inp = out;
-               ciph_d[i].out = out;
+               memcpy(ciph_d[i].out,ciph_d[i].inp,len-processed);
+               ciph_d[i].inp = ciph_d[i].out;
 
-               memmove(out,inp,len);
-               out += len;
+               out += 5+16+len;
 
                /* write MAC */
-               ((u32 *)out)[0] = BSWAP4(ctx->A[i]);
-               ((u32 *)out)[1] = BSWAP4(ctx->B[i]);
-               ((u32 *)out)[2] = BSWAP4(ctx->C[i]);
-               ((u32 *)out)[3] = BSWAP4(ctx->D[i]);
-               ((u32 *)out)[4] = BSWAP4(ctx->E[i]);
+               PUTU32(out+0,ctx->A[i]);
+               PUTU32(out+4,ctx->B[i]);
+               PUTU32(out+8,ctx->C[i]);
+               PUTU32(out+12,ctx->D[i]);
+               PUTU32(out+16,ctx->E[i]);
                out += 20;
                len += 20;
 
@@ -330,7 +399,7 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
                for (j=0;j<=pad;j++) *(out++) = pad;
                len += pad+1;
 
-               ciph_d[i].blocks = len/16;
+               ciph_d[i].blocks = (len-processed)/16;
                len += 16;      /* account for explicit iv */
 
                /* arrange header */
@@ -340,17 +409,8 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
                out0[3] = (u8)(len>>8);
                out0[4] = (u8)(len);
 
-               /* explicit iv */
-               memcpy(ciph_d[i].iv, IVs, 16);
-               memcpy(&out0[5],     IVs, 16);
-
                ret += len+5;
-
-               if (i==0) break;
-
-               out = out0-packlen;
-               inp -= frag;
-               IVs += 16;
+               inp += frag;
        }
 
        aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);
@@ -435,28 +495,45 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                /* arrange cache line alignment */
                pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32));
 
-               /* decrypt HMAC|padding at once */
-               aesni_cbc_encrypt(in,out,len,
-                               &key->ks,ctx->iv,0);
-
-               if (plen) {     /* "TLS" mode of operation */
+               if (plen != NO_PAYLOAD_LENGTH) {        /* "TLS" mode of operation */
                        size_t inp_len, mask, j, i;
                        unsigned int res, maxpad, pad, bitlen;
                        int ret = 1;
                        union { unsigned int  u[SHA_LBLOCK];
                                unsigned char c[SHA_CBLOCK]; }
                                *data = (void *)key->md.data;
+#if defined(STITCHED_DECRYPT_CALL)
+                       unsigned char tail_iv[AES_BLOCK_SIZE];
+                       int stitch=0;
+#endif
 
                        if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3])
-                           >= TLS1_1_VERSION)
-                               iv = AES_BLOCK_SIZE;
-
-                       if (len<(iv+SHA_DIGEST_LENGTH+1))
+                           >= TLS1_1_VERSION) {
+                               if (len<(AES_BLOCK_SIZE+SHA_DIGEST_LENGTH+1))
+                                       return 0;
+
+                               /* omit explicit iv */
+                               memcpy(ctx->iv,in,AES_BLOCK_SIZE);
+                               in  += AES_BLOCK_SIZE;
+                               out += AES_BLOCK_SIZE;
+                               len -= AES_BLOCK_SIZE;
+                       }
+                       else if (len<(SHA_DIGEST_LENGTH+1))
                                return 0;
 
-                       /* omit explicit iv */
-                       out += iv;
-                       len -= iv;
+#if defined(STITCHED_DECRYPT_CALL)
+                       if (len>=1024 && ctx->key_len==32) {
+                               /* decrypt last block */
+                               memcpy(tail_iv,in+len-2*AES_BLOCK_SIZE,AES_BLOCK_SIZE);
+                               aesni_cbc_encrypt(in+len-AES_BLOCK_SIZE,
+                                               out+len-AES_BLOCK_SIZE,AES_BLOCK_SIZE,
+                                               &key->ks,tail_iv,0);
+                               stitch=1;
+                       } else
+#endif
+                       /* decrypt HMAC|padding at once */
+                       aesni_cbc_encrypt(in,out,len,
+                                       &key->ks,ctx->iv,0);
 
                        /* figure out payload length */
                        pad = out[len-1];
@@ -476,6 +553,30 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                        key->md = key->head;
                        SHA1_Update(&key->md,key->aux.tls_aad,plen);
 
+#if defined(STITCHED_DECRYPT_CALL)
+                       if (stitch) {
+                               blocks = (len-(256+32+SHA_CBLOCK))/SHA_CBLOCK;
+                               aes_off = len-AES_BLOCK_SIZE-blocks*SHA_CBLOCK;
+                               sha_off = SHA_CBLOCK-plen;
+
+                               aesni_cbc_encrypt(in,out,aes_off,
+                                       &key->ks,ctx->iv,0);
+
+                               SHA1_Update(&key->md,out,sha_off);
+                               aesni256_cbc_sha1_dec(in+aes_off,
+                                       out+aes_off,blocks,&key->ks,ctx->iv,
+                                       &key->md,out+sha_off);
+
+                               sha_off += blocks*=SHA_CBLOCK;
+                               out += sha_off;
+                               len -= sha_off;
+                               inp_len -= sha_off;
+
+                               key->md.Nl += (blocks<<3);      /* at most 18 bits */
+                               memcpy(ctx->iv,tail_iv,AES_BLOCK_SIZE);
+                       }
+#endif
+
 #if 1
                        len -= SHA_DIGEST_LENGTH;               /* amend mac */
                        if (len>=(256+SHA_CBLOCK)) {
@@ -630,6 +731,34 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 #endif
                        return ret;
                } else {
+#if defined(STITCHED_DECRYPT_CALL)
+                       if (len>=1024 && ctx->key_len==32) {
+                               if (sha_off%=SHA_CBLOCK)
+                                       blocks = (len-3*SHA_CBLOCK)/SHA_CBLOCK;
+                               else
+                                       blocks = (len-2*SHA_CBLOCK)/SHA_CBLOCK;
+                               aes_off = len-blocks*SHA_CBLOCK;
+
+                               aesni_cbc_encrypt(in,out,aes_off,
+                                       &key->ks,ctx->iv,0);
+                               SHA1_Update(&key->md,out,sha_off);
+                               aesni256_cbc_sha1_dec(in+aes_off,
+                                       out+aes_off,blocks,&key->ks,ctx->iv,
+                                       &key->md,out+sha_off);
+
+                               sha_off += blocks*=SHA_CBLOCK;
+                               out += sha_off;
+                               len -= sha_off;
+
+                               key->md.Nh += blocks>>29;
+                               key->md.Nl += blocks<<=3;
+                               if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
+                       } else
+#endif
+                       /* decrypt HMAC|padding at once */
+                       aesni_cbc_encrypt(in,out,len,
+                                       &key->ks,ctx->iv,0);
+
                        SHA1_Update(&key->md,out,len);
                }
        }
@@ -701,6 +830,8 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void
                        }
                }
 #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+       case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
+               return (int)(5+16+((arg+20+16)&-16));
        case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
                {
                EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
@@ -708,7 +839,7 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void
                unsigned int n4x=1, x4;
                unsigned int frag, last, packlen, inp_len;
 
-               if (arg<sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) return -1;
+               if (arg<(int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) return -1;
 
                inp_len = param->inp[11]<<8|param->inp[12];
 
@@ -717,9 +848,17 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void
                        if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION)
                                return -1;
 
-                       if (inp_len<4096) return 0;     /* too short */
+                       if (inp_len)
+                               {
+                               if (inp_len<4096) return 0;     /* too short */
 
-                       if (OPENSSL_ia32cap_P[2]&(1<<5)) n4x=2; /* AVX2 */
+                               if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5))
+                                       n4x=2;  /* AVX2 */
+                               }
+                       else if ((n4x=param->interleave/4) && n4x<=2)
+                               inp_len = param->len;
+                       else
+                               return -1;
 
                        key->md = key->head;
                        SHA1_Update(&key->md,param->inp,13);