X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fevp%2Fe_aes_cbc_hmac_sha256.c;h=e3156039c618c0e454ad9c1815f16b5e2a7cf73f;hp=7d03abc1a68f46c5883e93a6be0a11938ea92ac7;hb=2893a302a9b6a70161d1859d985a52af11b2195d;hpb=7f893258f6e9298a24fbfced5331e4867df4fd6c diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c index 7d03abc1a6..e3156039c6 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha256.c +++ b/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -71,8 +71,8 @@ #define EVP_CIPH_FLAG_DEFAULT_ASN1 0 #endif -#if !defined(EVP_CIPH_FLAG_TLS11_MULTI_BLOCK) -#define EVP_CIPH_FLAG_TLS11_MULTI_BLOCK 0 +#if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 #endif #define TLS1_1_VERSION 0x0302 @@ -178,7 +178,7 @@ static void sha256_update(SHA256_CTX *c,const void *data,size_t len) #endif #define SHA256_Update sha256_update -#if EVP_CIPH_FLAG_TLS11_MULTI_BLOCK +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8],F[8],G[8],H[8]; } SHA256_MB_CTX; typedef struct { const unsigned char *ptr; int blocks; } HASH_DESC; @@ -186,11 +186,11 @@ typedef struct { const unsigned char *ptr; int blocks; } HASH_DESC; void sha256_multi_block(SHA256_MB_CTX *,const HASH_DESC *,int); typedef struct { const unsigned char *inp; unsigned char *out; - int blocks; double iv[2]; } CIPH_DESC; + int blocks; u64 iv[2]; } CIPH_DESC; void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int); -static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, unsigned char *out, const unsigned char *inp, size_t inp_len, int n4x) /* n4x is 1 or 2 */ { @@ -201,8 +201,15 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, u32 d[32]; u8 c[128]; } blocks[8]; SHA256_MB_CTX *ctx; - unsigned int frag, last, packlen, i, x4=4*n4x; + unsigned int frag, last, packlen, i, x4=4*n4x, minblocks, processed=0; size_t ret = 0; + u8 *IVs; +#if defined(BSWAP8) + u64 seqnum; +#endif + + if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0) /* ask for IVs in bulk */ + return 0; ctx = (SHA256_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */ @@ -213,11 +220,31 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, last -= x4-1; } + packlen = 5+16+((frag+32+16)&-16); + + /* populate descriptors with pointers and IVs */ hash_d[0].ptr = inp; - for (i=1;imd.data,8); + seqnum = BSWAP8(blocks[0].q[0]); +#endif for (i=0;iA[i] = key->md.h[0]; ctx->B[i] = key->md.h[1]; @@ -230,23 +257,19 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, /* fix seqnum */ #if defined(BSWAP8) - blocks[i].q[0] = BSWAP8(BSWAP8(*(u64*)key->md.data)+i); + blocks[i].q[0] = BSWAP8(seqnum+i); #else - blocks[i].c[7] += key->md.data[7]+i; - if (blocks[i].c[7] < i) { - int j; - - for (j=6;j>=0;j--) { - if (blocks[i].c[j]=key->md.data[j]+1) break; - } + for (carry=i,j=8;j--;) { + blocks[i].c[j] = ((u8*)key->md.data)[j]+carry; + carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1); } #endif - blocks[i].c[8] = key->md.data[8]; - blocks[i].c[9] = key->md.data[9]; - blocks[i].c[10] = key->md.data[10]; + blocks[i].c[8] = ((u8*)key->md.data)[8]; + blocks[i].c[9] = ((u8*)key->md.data)[9]; + blocks[i].c[10] = ((u8*)key->md.data)[10]; /* fix length */ - blocks[i].c[11] = (unsigned char)(len>>8); - blocks[i].c[12] = (unsigned char)(len); + blocks[i].c[11] = (u8)(len>>8); + blocks[i].c[12] = (u8)(len); memcpy(blocks[i].c+13,hash_d[i].ptr,64-13); hash_d[i].ptr += 64-13; @@ -256,7 +279,42 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, edges[i].blocks = 1; } + /* hash 13-byte headers and first 64-13 bytes of inputs */ sha256_multi_block(ctx,edges,n4x); + /* hash bulk inputs */ +#define MAXCHUNKSIZE 2048 +#if MAXCHUNKSIZE%64 +#error "MAXCHUNKSIZE is not divisible by 64" +#elif MAXCHUNKSIZE + /* goal is to minimize pressure on L1 cache by moving + * in shorter steps, so that hashed data is still in + * the cache by the time we encrypt it */ + minblocks = ((frag<=last ? frag : last)-(64-13))/64; + if (minblocks>MAXCHUNKSIZE/64) { + for (i=0;iks,n4x); + + for (i=0;iMAXCHUNKSIZE/64); + } +#endif +#undef MAXCHUNKSIZE sha256_multi_block(ctx,hash_d,n4x); memset(blocks,0,sizeof(blocks)); @@ -265,25 +323,35 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, off = hash_d[i].blocks*64; const unsigned char *ptr = hash_d[i].ptr+off; - off = len-(64-13)-off; /* remainder actually */ + off = (len-processed)-(64-13)-off; /* remainder actually */ memcpy(blocks[i].c,ptr,off); blocks[i].c[off]=0x80; len += 64+13; /* 64 is HMAC header */ len *= 8; /* convert to bits */ if (off<(64-8)) { +#ifdef BSWAP4 blocks[i].d[15] = BSWAP4(len); +#else + PUTU32(blocks[i].c+60,len); +#endif edges[i].blocks = 1; } else { +#ifdef BSWAP4 blocks[i].d[31] = BSWAP4(len); +#else + PUTU32(blocks[i].c+124,len); +#endif edges[i].blocks = 2; } edges[i].ptr = blocks[i].c; } + /* hash input tails and finalize */ sha256_multi_block(ctx,edges,n4x); memset(blocks,0,sizeof(blocks)); for (i=0;iA[i]); ctx->A[i] = key->tail.h[0]; blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h[1]; blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h[2]; @@ -294,66 +362,70 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, blocks[i].d[7] = BSWAP4(ctx->H[i]); ctx->H[i] = key->tail.h[7]; blocks[i].c[32] = 0x80; blocks[i].d[15] = BSWAP4((64+32)*8); +#else + PUTU32(blocks[i].c+0,ctx->A[i]); ctx->A[i] = key->tail.h[0]; + PUTU32(blocks[i].c+4,ctx->B[i]); ctx->B[i] = key->tail.h[1]; + PUTU32(blocks[i].c+8,ctx->C[i]); ctx->C[i] = key->tail.h[2]; + PUTU32(blocks[i].c+12,ctx->D[i]); ctx->D[i] = key->tail.h[3]; + PUTU32(blocks[i].c+16,ctx->E[i]); ctx->E[i] = key->tail.h[4]; + PUTU32(blocks[i].c+20,ctx->F[i]); ctx->F[i] = key->tail.h[5]; + PUTU32(blocks[i].c+24,ctx->G[i]); ctx->G[i] = key->tail.h[6]; + PUTU32(blocks[i].c+28,ctx->H[i]); ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + PUTU32(blocks[i].c+60,(64+32)*8); +#endif edges[i].ptr = blocks[i].c; edges[i].blocks = 1; } + /* finalize MACs */ sha256_multi_block(ctx,edges,n4x); - packlen = 5+16+((frag+32+16)&-16); - - out += (packlen<<(1+n4x))-packlen; - inp += (frag<<(1+n4x))-frag; - - for (i=x4-1;;i--) { + for (i=0;iA[i]); - ((u32 *)out)[1] = BSWAP4(ctx->B[i]); - ((u32 *)out)[2] = BSWAP4(ctx->C[i]); - ((u32 *)out)[3] = BSWAP4(ctx->D[i]); - ((u32 *)out)[4] = BSWAP4(ctx->E[i]); - ((u32 *)out)[5] = BSWAP4(ctx->F[i]); - ((u32 *)out)[6] = BSWAP4(ctx->G[i]); - ((u32 *)out)[7] = BSWAP4(ctx->H[i]); + memcpy(ciph_d[i].out,ciph_d[i].inp,len-processed); + ciph_d[i].inp = ciph_d[i].out; + + out += 5+16+len; + + /* write MAC */ + PUTU32(out+0,ctx->A[i]); + PUTU32(out+4,ctx->B[i]); + PUTU32(out+8,ctx->C[i]); + PUTU32(out+12,ctx->D[i]); + PUTU32(out+16,ctx->E[i]); + PUTU32(out+20,ctx->F[i]); + PUTU32(out+24,ctx->G[i]); + PUTU32(out+28,ctx->H[i]); out += 32; - len += 32+16; + len += 32; + /* pad */ pad = 15-len%16; for (j=0;j<=pad;j++) *(out++) = pad; len += pad+1; - ciph_d[i].blocks = len/16; + ciph_d[i].blocks = (len-processed)/16; + len += 16; /* account for explicit iv */ /* arrange header */ - out0[0] = key->md.data[8]; - out0[1] = key->md.data[9]; - out0[2] = key->md.data[10]; - out0[3] = (unsigned char)(len>>8); - out0[4] = (unsigned char)(len); - - /* explicit iv */ - RAND_bytes((u8 *)ciph_d[i].iv, 16); - memcpy(&out[5], ciph_d[i].iv, 16); + out0[0] = ((u8*)key->md.data)[8]; + out0[1] = ((u8*)key->md.data)[9]; + out0[2] = ((u8*)key->md.data)[10]; + out0[3] = (u8)(len>>8); + out0[4] = (u8)(len); ret += len+5; - - if (i==0) break; - - out = out0-packlen; + inp += frag; } aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x); + OPENSSL_cleanse(blocks,sizeof(blocks)); + OPENSSL_cleanse(ctx,sizeof(*ctx)); + return ret; } #endif @@ -386,7 +458,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, iv = AES_BLOCK_SIZE; #if defined(STITCHED_CALL) - if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && + if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && /* AVX? */ plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) { SHA256_Update(&key->md,in+iv,sha_off); @@ -437,7 +509,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, aesni_cbc_encrypt(in,out,len, &key->ks,ctx->iv,0); - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; @@ -653,6 +725,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr) { EVP_AES_HMAC_SHA256 *key = data(ctx); + unsigned int u_arg = (unsigned int)arg; switch (type) { @@ -663,7 +736,10 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo memset (hmac_key,0,sizeof(hmac_key)); - if (arg > (int)sizeof(hmac_key)) { + if (arg < 0) + return -1; + + if (u_arg > sizeof(hmac_key)) { SHA256_Init(&key->head); SHA256_Update(&key->head,ptr,arg); SHA256_Final(hmac_key,&key->head); @@ -713,15 +789,20 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo return SHA256_DIGEST_LENGTH; } } -#if EVP_EVP_CIPH_FLAG_TLS11_MULTI_BLOCK - case EVP_CTRL_TLS11_MULTI_BLOCK_AAD: +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5+16+((arg+32+16)&-16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: { - EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *param = - (EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *)ptr; + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; unsigned int n4x=1, x4; unsigned int frag, last, packlen, inp_len; - if (arginp[11]<<8|param->inp[12]; @@ -729,10 +810,18 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo { if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION) return -1; - - if (inp_len<2048) return -1; /* too short */ - if (inp_len>=6144) n4x=2; + if (inp_len) + { + if (inp_len<4096) return 0; /* too short */ + + if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) + n4x=2; /* AVX2 */ + } + else if ((n4x=param->interleave/4) && n4x<=2) + inp_len = param->len; + else + return -1; key->md = key->head; SHA256_Update(&key->md,param->inp,13); @@ -747,7 +836,7 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo } packlen = 5+16+((frag+32+16)&-16); - packlen = (packlen<<(1+n4x))-packlen; + packlen = (packlen<interleave = x4; @@ -757,15 +846,15 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo else return -1; /* not yet */ } - case EVP_CTRL_TLS11_MULTI_BLOCK_ENCRYPT: + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: { - EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *param = - (EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *)ptr; + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; - return tls11_multi_block_encrypt(key,param->out,param->inp, + return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp, param->len,param->interleave/4); } - case EVP_CTRL_TLS11_MULTI_BLOCK_DECRYPT: + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: #endif default: return -1; @@ -781,7 +870,7 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = #endif 16,16,16, EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| - EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS11_MULTI_BLOCK, + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha256_init_key, aesni_cbc_hmac_sha256_cipher, NULL, @@ -801,7 +890,7 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = #endif 16,32,16, EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| - EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS11_MULTI_BLOCK, + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha256_init_key, aesni_cbc_hmac_sha256_cipher, NULL,