X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fevp%2Fe_aes_cbc_hmac_sha1.c;h=ec76393576800637b0035dd056cf821fd8f5a412;hp=583fe8cc488206393b4366762e6b6d649c2f5a40;hb=2893a302a9b6a70161d1859d985a52af11b2195d;hpb=7f893258f6e9298a24fbfced5331e4867df4fd6c diff --git a/crypto/evp/e_aes_cbc_hmac_sha1.c b/crypto/evp/e_aes_cbc_hmac_sha1.c index 583fe8cc48..ec76393576 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -71,8 +71,8 @@ #define EVP_CIPH_FLAG_DEFAULT_ASN1 0 #endif -#if !defined(EVP_CIPH_FLAG_TLS11_MULTI_BLOCK) -#define EVP_CIPH_FLAG_TLS11_MULTI_BLOCK 0 +#if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 #endif #define TLS1_1_VERSION 0x0302 @@ -95,7 +95,7 @@ typedef struct defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[3]; #define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, @@ -113,6 +113,10 @@ void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks, const AES_KEY *key, unsigned char iv[16], SHA_CTX *ctx,const void *in0); +void aesni256_cbc_sha1_dec (const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA_CTX *ctx,const void *in0); + #define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data) static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, @@ -137,6 +141,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, } #define STITCHED_CALL +#undef STITCHED_DECRYPT_CALL #if !defined(STITCHED_CALL) #define aes_off 0 @@ -177,7 +182,7 @@ static void sha1_update(SHA_CTX *c,const void *data,size_t len) #endif #define SHA1_Update sha1_update -#if EVP_CIPH_FLAG_TLS11_MULTI_BLOCK +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8]; } SHA1_MB_CTX; typedef struct { const unsigned char *ptr; int blocks; } HASH_DESC; @@ -185,11 +190,11 @@ typedef struct { const unsigned char *ptr; int blocks; } HASH_DESC; void sha1_multi_block(SHA1_MB_CTX *,const HASH_DESC *,int); typedef struct { const unsigned char *inp; unsigned char *out; - int blocks; double iv[2]; } CIPH_DESC; + int blocks; u64 iv[2]; } CIPH_DESC; void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int); -static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, unsigned char *out, const unsigned char *inp, size_t inp_len, int n4x) /* n4x is 1 or 2 */ { @@ -200,8 +205,15 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, u32 d[32]; u8 c[128]; } blocks[8]; SHA1_MB_CTX *ctx; - unsigned int frag, last, packlen, i, x4=4*n4x; + unsigned int frag, last, packlen, i, x4=4*n4x, minblocks, processed=0; size_t ret = 0; + u8 *IVs; +#if defined(BSWAP8) + u64 seqnum; +#endif + + if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0) /* ask for IVs in bulk */ + return 0; ctx = (SHA1_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */ @@ -212,11 +224,31 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, last -= x4-1; } + packlen = 5+16+((frag+20+16)&-16); + + /* populate descriptors with pointers and IVs */ hash_d[0].ptr = inp; - for (i=1;imd.data,8); + seqnum = BSWAP8(blocks[0].q[0]); +#endif for (i=0;iA[i] = key->md.h0; ctx->B[i] = key->md.h1; @@ -226,23 +258,19 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, /* fix seqnum */ #if defined(BSWAP8) - blocks[i].q[0] = BSWAP8(BSWAP8(*(u64*)key->md.data)+i); + blocks[i].q[0] = BSWAP8(seqnum+i); #else - blocks[i].c[7] += key->md.data[7]+i; - if (blocks[i].c[7] < i) { - int j; - - for (j=6;j>=0;j--) { - if (blocks[i].c[j]=key->md.data[j]+1) break; - } + for (carry=i,j=8;j--;) { + blocks[i].c[j] = ((u8*)key->md.data)[j]+carry; + carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1); } #endif - blocks[i].c[8] = key->md.data[8]; - blocks[i].c[9] = key->md.data[9]; - blocks[i].c[10] = key->md.data[10]; + blocks[i].c[8] = ((u8*)key->md.data)[8]; + blocks[i].c[9] = ((u8*)key->md.data)[9]; + blocks[i].c[10] = ((u8*)key->md.data)[10]; /* fix length */ - blocks[i].c[11] = (unsigned char)(len>>8); - blocks[i].c[12] = (unsigned char)(len); + blocks[i].c[11] = (u8)(len>>8); + blocks[i].c[12] = (u8)(len); memcpy(blocks[i].c+13,hash_d[i].ptr,64-13); hash_d[i].ptr += 64-13; @@ -252,7 +280,42 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, edges[i].blocks = 1; } + /* hash 13-byte headers and first 64-13 bytes of inputs */ sha1_multi_block(ctx,edges,n4x); + /* hash bulk inputs */ +#define MAXCHUNKSIZE 2048 +#if MAXCHUNKSIZE%64 +#error "MAXCHUNKSIZE is not divisible by 64" +#elif MAXCHUNKSIZE + /* goal is to minimize pressure on L1 cache by moving + * in shorter steps, so that hashed data is still in + * the cache by the time we encrypt it */ + minblocks = ((frag<=last ? frag : last)-(64-13))/64; + if (minblocks>MAXCHUNKSIZE/64) { + for (i=0;iks,n4x); + + for (i=0;iMAXCHUNKSIZE/64); + } +#endif +#undef MAXCHUNKSIZE sha1_multi_block(ctx,hash_d,n4x); memset(blocks,0,sizeof(blocks)); @@ -261,25 +324,35 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, off = hash_d[i].blocks*64; const unsigned char *ptr = hash_d[i].ptr+off; - off = len-(64-13)-off; /* remainder actually */ + off = (len-processed)-(64-13)-off; /* remainder actually */ memcpy(blocks[i].c,ptr,off); blocks[i].c[off]=0x80; len += 64+13; /* 64 is HMAC header */ len *= 8; /* convert to bits */ if (off<(64-8)) { +#ifdef BSWAP4 blocks[i].d[15] = BSWAP4(len); +#else + PUTU32(blocks[i].c+60,len); +#endif edges[i].blocks = 1; } else { +#ifdef BSWAP4 blocks[i].d[31] = BSWAP4(len); +#else + PUTU32(blocks[i].c+124,len); +#endif edges[i].blocks = 2; } edges[i].ptr = blocks[i].c; } + /* hash input tails and finalize */ sha1_multi_block(ctx,edges,n4x); memset(blocks,0,sizeof(blocks)); for (i=0;iA[i]); ctx->A[i] = key->tail.h0; blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h1; blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h2; @@ -287,63 +360,64 @@ static size_t tls11_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, blocks[i].d[4] = BSWAP4(ctx->E[i]); ctx->E[i] = key->tail.h4; blocks[i].c[20] = 0x80; blocks[i].d[15] = BSWAP4((64+20)*8); +#else + PUTU32(blocks[i].c+0,ctx->A[i]); ctx->A[i] = key->tail.h0; + PUTU32(blocks[i].c+4,ctx->B[i]); ctx->B[i] = key->tail.h1; + PUTU32(blocks[i].c+8,ctx->C[i]); ctx->C[i] = key->tail.h2; + PUTU32(blocks[i].c+12,ctx->D[i]); ctx->D[i] = key->tail.h3; + PUTU32(blocks[i].c+16,ctx->E[i]); ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + PUTU32(blocks[i].c+60,(64+20)*8); +#endif edges[i].ptr = blocks[i].c; edges[i].blocks = 1; } + /* finalize MACs */ sha1_multi_block(ctx,edges,n4x); - packlen = 5+16+((frag+20+16)&-16); - - out += (packlen<<(1+n4x))-packlen; - inp += (frag<<(1+n4x))-frag; - - for (i=x4-1;;i--) { + for (i=0;iA[i]); - ((u32 *)out)[1] = BSWAP4(ctx->B[i]); - ((u32 *)out)[2] = BSWAP4(ctx->C[i]); - ((u32 *)out)[3] = BSWAP4(ctx->D[i]); - ((u32 *)out)[4] = BSWAP4(ctx->E[i]); + /* write MAC */ + PUTU32(out+0,ctx->A[i]); + PUTU32(out+4,ctx->B[i]); + PUTU32(out+8,ctx->C[i]); + PUTU32(out+12,ctx->D[i]); + PUTU32(out+16,ctx->E[i]); out += 20; - len += 20+16; + len += 20; + /* pad */ pad = 15-len%16; for (j=0;j<=pad;j++) *(out++) = pad; len += pad+1; - ciph_d[i].blocks = len/16; + ciph_d[i].blocks = (len-processed)/16; + len += 16; /* account for explicit iv */ /* arrange header */ - out0[0] = key->md.data[8]; - out0[1] = key->md.data[9]; - out0[2] = key->md.data[10]; - out0[3] = (unsigned char)(len>>8); - out0[4] = (unsigned char)(len); - - /* explicit iv */ - RAND_bytes((u8 *)ciph_d[i].iv, 16); - memcpy(&out[5], ciph_d[i].iv, 16); + out0[0] = ((u8*)key->md.data)[8]; + out0[1] = ((u8*)key->md.data)[9]; + out0[2] = ((u8*)key->md.data)[10]; + out0[3] = (u8)(len>>8); + out0[4] = (u8)(len); ret += len+5; - - if (i==0) break; - - out = out0-packlen; + inp += frag; } aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x); + OPENSSL_cleanse(blocks,sizeof(blocks)); + OPENSSL_cleanse(ctx,sizeof(*ctx)); + return ret; } #endif @@ -421,28 +495,45 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* arrange cache line alignment */ pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32)); - /* decrypt HMAC|padding at once */ - aesni_cbc_encrypt(in,out,len, - &key->ks,ctx->iv,0); - - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; union { unsigned int u[SHA_LBLOCK]; unsigned char c[SHA_CBLOCK]; } *data = (void *)key->md.data; +#if defined(STITCHED_DECRYPT_CALL) + unsigned char tail_iv[AES_BLOCK_SIZE]; + int stitch=0; +#endif if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3]) - >= TLS1_1_VERSION) - iv = AES_BLOCK_SIZE; - - if (len<(iv+SHA_DIGEST_LENGTH+1)) + >= TLS1_1_VERSION) { + if (len<(AES_BLOCK_SIZE+SHA_DIGEST_LENGTH+1)) + return 0; + + /* omit explicit iv */ + memcpy(ctx->iv,in,AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } + else if (len<(SHA_DIGEST_LENGTH+1)) return 0; - /* omit explicit iv */ - out += iv; - len -= iv; +#if defined(STITCHED_DECRYPT_CALL) + if (len>=1024 && ctx->key_len==32) { + /* decrypt last block */ + memcpy(tail_iv,in+len-2*AES_BLOCK_SIZE,AES_BLOCK_SIZE); + aesni_cbc_encrypt(in+len-AES_BLOCK_SIZE, + out+len-AES_BLOCK_SIZE,AES_BLOCK_SIZE, + &key->ks,tail_iv,0); + stitch=1; + } else +#endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); /* figure out payload length */ pad = out[len-1]; @@ -462,6 +553,30 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, key->md = key->head; SHA1_Update(&key->md,key->aux.tls_aad,plen); +#if defined(STITCHED_DECRYPT_CALL) + if (stitch) { + blocks = (len-(256+32+SHA_CBLOCK))/SHA_CBLOCK; + aes_off = len-AES_BLOCK_SIZE-blocks*SHA_CBLOCK; + sha_off = SHA_CBLOCK-plen; + + aesni_cbc_encrypt(in,out,aes_off, + &key->ks,ctx->iv,0); + + SHA1_Update(&key->md,out,sha_off); + aesni256_cbc_sha1_dec(in+aes_off, + out+aes_off,blocks,&key->ks,ctx->iv, + &key->md,out+sha_off); + + sha_off += blocks*=SHA_CBLOCK; + out += sha_off; + len -= sha_off; + inp_len -= sha_off; + + key->md.Nl += (blocks<<3); /* at most 18 bits */ + memcpy(ctx->iv,tail_iv,AES_BLOCK_SIZE); + } +#endif + #if 1 len -= SHA_DIGEST_LENGTH; /* amend mac */ if (len>=(256+SHA_CBLOCK)) { @@ -616,6 +731,34 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, #endif return ret; } else { +#if defined(STITCHED_DECRYPT_CALL) + if (len>=1024 && ctx->key_len==32) { + if (sha_off%=SHA_CBLOCK) + blocks = (len-3*SHA_CBLOCK)/SHA_CBLOCK; + else + blocks = (len-2*SHA_CBLOCK)/SHA_CBLOCK; + aes_off = len-blocks*SHA_CBLOCK; + + aesni_cbc_encrypt(in,out,aes_off, + &key->ks,ctx->iv,0); + SHA1_Update(&key->md,out,sha_off); + aesni256_cbc_sha1_dec(in+aes_off, + out+aes_off,blocks,&key->ks,ctx->iv, + &key->md,out+sha_off); + + sha_off += blocks*=SHA_CBLOCK; + out += sha_off; + len -= sha_off; + + key->md.Nh += blocks>>29; + key->md.Nl += blocks<<=3; + if (key->md.Nl<(unsigned int)blocks) key->md.Nh++; + } else +#endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); + SHA1_Update(&key->md,out,len); } } @@ -686,15 +829,17 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void return SHA_DIGEST_LENGTH; } } -#if EVP_EVP_CIPH_FLAG_TLS11_MULTI_BLOCK - case EVP_CTRL_TLS11_MULTI_BLOCK_AAD: +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5+16+((arg+20+16)&-16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: { - EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *param = - (EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *)ptr; + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; unsigned int n4x=1, x4; unsigned int frag, last, packlen, inp_len; - if (arginp[11]<<8|param->inp[12]; @@ -702,10 +847,18 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void { if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION) return -1; - - if (inp_len<2048) return -1; /* too short */ - if (inp_len>=6144) n4x=2; + if (inp_len) + { + if (inp_len<4096) return 0; /* too short */ + + if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) + n4x=2; /* AVX2 */ + } + else if ((n4x=param->interleave/4) && n4x<=2) + inp_len = param->len; + else + return -1; key->md = key->head; SHA1_Update(&key->md,param->inp,13); @@ -720,7 +873,7 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void } packlen = 5+16+((frag+20+16)&-16); - packlen = (packlen<<(1+n4x))-packlen; + packlen = (packlen<interleave = x4; @@ -730,15 +883,15 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void else return -1; /* not yet */ } - case EVP_CTRL_TLS11_MULTI_BLOCK_ENCRYPT: + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: { - EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *param = - (EVP_CTRL_TLS11_MULTI_BLOCK_PARAM *)ptr; + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; - return tls11_multi_block_encrypt(key,param->out,param->inp, + return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp, param->len,param->interleave/4); } - case EVP_CTRL_TLS11_MULTI_BLOCK_DECRYPT: + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: #endif default: return -1; @@ -754,7 +907,7 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher = #endif 16,16,16, EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| - EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS11_MULTI_BLOCK, + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, @@ -774,7 +927,7 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = #endif 16,32,16, EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| - EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS11_MULTI_BLOCK, + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL,