X-Git-Url: https://git.openssl.org/gitweb/?a=blobdiff_plain;f=crypto%2Fevp%2Fe_aes_cbc_hmac_sha256.c;h=e3156039c618c0e454ad9c1815f16b5e2a7cf73f;hb=2893a302a9b6a70161d1859d985a52af11b2195d;hp=e1a21b39ee2f04c1cbbc97fc0a42a3b2f27e81d9;hpb=b4f0abd246340b90bb3fa2646814729f0e9d049e;p=openssl.git diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c index e1a21b39ee..e3156039c6 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha256.c +++ b/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -201,9 +201,15 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, u32 d[32]; u8 c[128]; } blocks[8]; SHA256_MB_CTX *ctx; - unsigned int frag, last, packlen, i, x4=4*n4x; + unsigned int frag, last, packlen, i, x4=4*n4x, minblocks, processed=0; size_t ret = 0; u8 *IVs; +#if defined(BSWAP8) + u64 seqnum; +#endif + + if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0) /* ask for IVs in bulk */ + return 0; ctx = (SHA256_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */ @@ -214,11 +220,31 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, last -= x4-1; } + packlen = 5+16+((frag+32+16)&-16); + + /* populate descriptors with pointers and IVs */ hash_d[0].ptr = inp; - for (i=1;imd.data,8); + seqnum = BSWAP8(blocks[0].q[0]); +#endif for (i=0;iA[i] = key->md.h[0]; ctx->B[i] = key->md.h[1]; @@ -231,15 +257,11 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, /* fix seqnum */ #if defined(BSWAP8) - blocks[i].q[0] = BSWAP8(BSWAP8(*(u64*)key->md.data)+i); + blocks[i].q[0] = BSWAP8(seqnum+i); #else - blocks[i].c[7] += ((u8*)key->md.data)[7]+i; - if (blocks[i].c[7] < i) { - int j; - - for (j=6;j>=0;j--) { - if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break; - } + for (carry=i,j=8;j--;) { + blocks[i].c[j] = ((u8*)key->md.data)[j]+carry; + carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1); } #endif blocks[i].c[8] = ((u8*)key->md.data)[8]; @@ -260,6 +282,39 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, /* hash 13-byte headers and first 64-13 bytes of inputs */ sha256_multi_block(ctx,edges,n4x); /* hash bulk inputs */ +#define MAXCHUNKSIZE 2048 +#if MAXCHUNKSIZE%64 +#error "MAXCHUNKSIZE is not divisible by 64" +#elif MAXCHUNKSIZE + /* goal is to minimize pressure on L1 cache by moving + * in shorter steps, so that hashed data is still in + * the cache by the time we encrypt it */ + minblocks = ((frag<=last ? frag : last)-(64-13))/64; + if (minblocks>MAXCHUNKSIZE/64) { + for (i=0;iks,n4x); + + for (i=0;iMAXCHUNKSIZE/64); + } +#endif +#undef MAXCHUNKSIZE sha256_multi_block(ctx,hash_d,n4x); memset(blocks,0,sizeof(blocks)); @@ -268,16 +323,24 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, off = hash_d[i].blocks*64; const unsigned char *ptr = hash_d[i].ptr+off; - off = len-(64-13)-off; /* remainder actually */ + off = (len-processed)-(64-13)-off; /* remainder actually */ memcpy(blocks[i].c,ptr,off); blocks[i].c[off]=0x80; len += 64+13; /* 64 is HMAC header */ len *= 8; /* convert to bits */ if (off<(64-8)) { +#ifdef BSWAP4 blocks[i].d[15] = BSWAP4(len); +#else + PUTU32(blocks[i].c+60,len); +#endif edges[i].blocks = 1; } else { +#ifdef BSWAP4 blocks[i].d[31] = BSWAP4(len); +#else + PUTU32(blocks[i].c+124,len); +#endif edges[i].blocks = 2; } edges[i].ptr = blocks[i].c; @@ -288,6 +351,7 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, memset(blocks,0,sizeof(blocks)); for (i=0;iA[i]); ctx->A[i] = key->tail.h[0]; blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h[1]; blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h[2]; @@ -298,6 +362,18 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, blocks[i].d[7] = BSWAP4(ctx->H[i]); ctx->H[i] = key->tail.h[7]; blocks[i].c[32] = 0x80; blocks[i].d[15] = BSWAP4((64+32)*8); +#else + PUTU32(blocks[i].c+0,ctx->A[i]); ctx->A[i] = key->tail.h[0]; + PUTU32(blocks[i].c+4,ctx->B[i]); ctx->B[i] = key->tail.h[1]; + PUTU32(blocks[i].c+8,ctx->C[i]); ctx->C[i] = key->tail.h[2]; + PUTU32(blocks[i].c+12,ctx->D[i]); ctx->D[i] = key->tail.h[3]; + PUTU32(blocks[i].c+16,ctx->E[i]); ctx->E[i] = key->tail.h[4]; + PUTU32(blocks[i].c+20,ctx->F[i]); ctx->F[i] = key->tail.h[5]; + PUTU32(blocks[i].c+24,ctx->G[i]); ctx->G[i] = key->tail.h[6]; + PUTU32(blocks[i].c+28,ctx->H[i]); ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + PUTU32(blocks[i].c+60,(64+32)*8); +#endif edges[i].ptr = blocks[i].c; edges[i].blocks = 1; } @@ -305,33 +381,24 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, /* finalize MACs */ sha256_multi_block(ctx,edges,n4x); - packlen = 5+16+((frag+32+16)&-16); - - out += (packlen<<(1+n4x))-packlen; - inp += (frag<<(1+n4x))-frag; - - RAND_bytes((IVs=blocks[0].c),16*x4); /* ask for IVs in bulk */ - - for (i=x4-1;;i--) { + for (i=0;iA[i]); - ((u32 *)out)[1] = BSWAP4(ctx->B[i]); - ((u32 *)out)[2] = BSWAP4(ctx->C[i]); - ((u32 *)out)[3] = BSWAP4(ctx->D[i]); - ((u32 *)out)[4] = BSWAP4(ctx->E[i]); - ((u32 *)out)[5] = BSWAP4(ctx->F[i]); - ((u32 *)out)[6] = BSWAP4(ctx->G[i]); - ((u32 *)out)[7] = BSWAP4(ctx->H[i]); + PUTU32(out+0,ctx->A[i]); + PUTU32(out+4,ctx->B[i]); + PUTU32(out+8,ctx->C[i]); + PUTU32(out+12,ctx->D[i]); + PUTU32(out+16,ctx->E[i]); + PUTU32(out+20,ctx->F[i]); + PUTU32(out+24,ctx->G[i]); + PUTU32(out+28,ctx->H[i]); out += 32; len += 32; @@ -340,7 +407,7 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, for (j=0;j<=pad;j++) *(out++) = pad; len += pad+1; - ciph_d[i].blocks = len/16; + ciph_d[i].blocks = (len-processed)/16; len += 16; /* account for explicit iv */ /* arrange header */ @@ -350,17 +417,8 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, out0[3] = (u8)(len>>8); out0[4] = (u8)(len); - /* explicit iv */ - memcpy(ciph_d[i].iv, IVs, 16); - memcpy(&out0[5], IVs, 16); - ret += len+5; - - if (i==0) break; - - out = out0-packlen; - inp -= frag; - IVs += 16; + inp += frag; } aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x); @@ -400,7 +458,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, iv = AES_BLOCK_SIZE; #if defined(STITCHED_CALL) - if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && + if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && /* AVX? */ plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) { SHA256_Update(&key->md,in+iv,sha_off); @@ -451,7 +509,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, aesni_cbc_encrypt(in,out,len, &key->ks,ctx->iv,0); - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; @@ -667,6 +725,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr) { EVP_AES_HMAC_SHA256 *key = data(ctx); + unsigned int u_arg = (unsigned int)arg; switch (type) { @@ -677,7 +736,10 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo memset (hmac_key,0,sizeof(hmac_key)); - if (arg > (int)sizeof(hmac_key)) { + if (arg < 0) + return -1; + + if (u_arg > sizeof(hmac_key)) { SHA256_Init(&key->head); SHA256_Update(&key->head,ptr,arg); SHA256_Final(hmac_key,&key->head); @@ -728,6 +790,8 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo } } #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5+16+((arg+32+16)&-16)); case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: { EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = @@ -735,7 +799,10 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo unsigned int n4x=1, x4; unsigned int frag, last, packlen, inp_len; - if (arginp[11]<<8|param->inp[12]; @@ -744,10 +811,17 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION) return -1; - if (inp_len<4096) return 0; /* too short */ + if (inp_len) + { + if (inp_len<4096) return 0; /* too short */ - if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) - n4x=2; /* AVX2 */ + if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) + n4x=2; /* AVX2 */ + } + else if ((n4x=param->interleave/4) && n4x<=2) + inp_len = param->len; + else + return -1; key->md = key->head; SHA256_Update(&key->md,param->inp,13);