X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fevp%2Fe_aes_cbc_hmac_sha1.c;h=ec76393576800637b0035dd056cf821fd8f5a412;hp=15e96386b135220f76c7358d7ef8bf9f6601b280;hb=2893a302a9b6a70161d1859d985a52af11b2195d;hpb=a69c0a1be5c619a74c02fcef05be6142d4700f62 diff --git a/crypto/evp/e_aes_cbc_hmac_sha1.c b/crypto/evp/e_aes_cbc_hmac_sha1.c index 15e96386b1..ec76393576 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -113,6 +113,10 @@ void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks, const AES_KEY *key, unsigned char iv[16], SHA_CTX *ctx,const void *in0); +void aesni256_cbc_sha1_dec (const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA_CTX *ctx,const void *in0); + #define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data) static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, @@ -137,6 +141,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, } #define STITCHED_CALL +#undef STITCHED_DECRYPT_CALL #if !defined(STITCHED_CALL) #define aes_off 0 @@ -200,9 +205,15 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, u32 d[32]; u8 c[128]; } blocks[8]; SHA1_MB_CTX *ctx; - unsigned int frag, last, packlen, i, x4=4*n4x; + unsigned int frag, last, packlen, i, x4=4*n4x, minblocks, processed=0; size_t ret = 0; u8 *IVs; +#if defined(BSWAP8) + u64 seqnum; +#endif + + if (RAND_bytes((IVs=blocks[0].c),16*x4)<=0) /* ask for IVs in bulk */ + return 0; ctx = (SHA1_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */ @@ -213,11 +224,31 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, last -= x4-1; } + packlen = 5+16+((frag+20+16)&-16); + + /* populate descriptors with pointers and IVs */ hash_d[0].ptr = inp; - for (i=1;imd.data,8); + seqnum = BSWAP8(blocks[0].q[0]); +#endif for (i=0;iA[i] = key->md.h0; ctx->B[i] = key->md.h1; @@ -227,15 +258,11 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, /* fix seqnum */ #if defined(BSWAP8) - blocks[i].q[0] = BSWAP8(BSWAP8(*(u64*)key->md.data)+i); + blocks[i].q[0] = BSWAP8(seqnum+i); #else - blocks[i].c[7] += ((u8*)key->md.data)[7]+i; - if (blocks[i].c[7] < i) { - int j; - - for (j=6;j>=0;j--) { - if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break; - } + for (carry=i,j=8;j--;) { + blocks[i].c[j] = ((u8*)key->md.data)[j]+carry; + carry = (blocks[i].c[j]-carry)>>(sizeof(carry)*8-1); } #endif blocks[i].c[8] = ((u8*)key->md.data)[8]; @@ -256,6 +283,39 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, /* hash 13-byte headers and first 64-13 bytes of inputs */ sha1_multi_block(ctx,edges,n4x); /* hash bulk inputs */ +#define MAXCHUNKSIZE 2048 +#if MAXCHUNKSIZE%64 +#error "MAXCHUNKSIZE is not divisible by 64" +#elif MAXCHUNKSIZE + /* goal is to minimize pressure on L1 cache by moving + * in shorter steps, so that hashed data is still in + * the cache by the time we encrypt it */ + minblocks = ((frag<=last ? frag : last)-(64-13))/64; + if (minblocks>MAXCHUNKSIZE/64) { + for (i=0;iks,n4x); + + for (i=0;iMAXCHUNKSIZE/64); + } +#endif +#undef MAXCHUNKSIZE sha1_multi_block(ctx,hash_d,n4x); memset(blocks,0,sizeof(blocks)); @@ -264,16 +324,24 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, off = hash_d[i].blocks*64; const unsigned char *ptr = hash_d[i].ptr+off; - off = len-(64-13)-off; /* remainder actually */ + off = (len-processed)-(64-13)-off; /* remainder actually */ memcpy(blocks[i].c,ptr,off); blocks[i].c[off]=0x80; len += 64+13; /* 64 is HMAC header */ len *= 8; /* convert to bits */ if (off<(64-8)) { +#ifdef BSWAP4 blocks[i].d[15] = BSWAP4(len); +#else + PUTU32(blocks[i].c+60,len); +#endif edges[i].blocks = 1; } else { +#ifdef BSWAP4 blocks[i].d[31] = BSWAP4(len); +#else + PUTU32(blocks[i].c+124,len); +#endif edges[i].blocks = 2; } edges[i].ptr = blocks[i].c; @@ -284,6 +352,7 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, memset(blocks,0,sizeof(blocks)); for (i=0;iA[i]); ctx->A[i] = key->tail.h0; blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h1; blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h2; @@ -291,6 +360,15 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, blocks[i].d[4] = BSWAP4(ctx->E[i]); ctx->E[i] = key->tail.h4; blocks[i].c[20] = 0x80; blocks[i].d[15] = BSWAP4((64+20)*8); +#else + PUTU32(blocks[i].c+0,ctx->A[i]); ctx->A[i] = key->tail.h0; + PUTU32(blocks[i].c+4,ctx->B[i]); ctx->B[i] = key->tail.h1; + PUTU32(blocks[i].c+8,ctx->C[i]); ctx->C[i] = key->tail.h2; + PUTU32(blocks[i].c+12,ctx->D[i]); ctx->D[i] = key->tail.h3; + PUTU32(blocks[i].c+16,ctx->E[i]); ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + PUTU32(blocks[i].c+60,(64+20)*8); +#endif edges[i].ptr = blocks[i].c; edges[i].blocks = 1; } @@ -298,30 +376,21 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, /* finalize MACs */ sha1_multi_block(ctx,edges,n4x); - packlen = 5+16+((frag+20+16)&-16); - - out += (packlen<<(1+n4x))-packlen; - inp += (frag<<(1+n4x))-frag; - - RAND_bytes((IVs=blocks[0].c),16*x4); /* ask for IVs in bulk */ - - for (i=x4-1;;i--) { + for (i=0;iA[i]); - ((u32 *)out)[1] = BSWAP4(ctx->B[i]); - ((u32 *)out)[2] = BSWAP4(ctx->C[i]); - ((u32 *)out)[3] = BSWAP4(ctx->D[i]); - ((u32 *)out)[4] = BSWAP4(ctx->E[i]); + PUTU32(out+0,ctx->A[i]); + PUTU32(out+4,ctx->B[i]); + PUTU32(out+8,ctx->C[i]); + PUTU32(out+12,ctx->D[i]); + PUTU32(out+16,ctx->E[i]); out += 20; len += 20; @@ -330,7 +399,7 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, for (j=0;j<=pad;j++) *(out++) = pad; len += pad+1; - ciph_d[i].blocks = len/16; + ciph_d[i].blocks = (len-processed)/16; len += 16; /* account for explicit iv */ /* arrange header */ @@ -340,17 +409,8 @@ static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, out0[3] = (u8)(len>>8); out0[4] = (u8)(len); - /* explicit iv */ - memcpy(ciph_d[i].iv, IVs, 16); - memcpy(&out0[5], IVs, 16); - ret += len+5; - - if (i==0) break; - - out = out0-packlen; - inp -= frag; - IVs += 16; + inp += frag; } aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x); @@ -435,28 +495,45 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* arrange cache line alignment */ pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32)); - /* decrypt HMAC|padding at once */ - aesni_cbc_encrypt(in,out,len, - &key->ks,ctx->iv,0); - - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; union { unsigned int u[SHA_LBLOCK]; unsigned char c[SHA_CBLOCK]; } *data = (void *)key->md.data; +#if defined(STITCHED_DECRYPT_CALL) + unsigned char tail_iv[AES_BLOCK_SIZE]; + int stitch=0; +#endif if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3]) - >= TLS1_1_VERSION) - iv = AES_BLOCK_SIZE; - - if (len<(iv+SHA_DIGEST_LENGTH+1)) + >= TLS1_1_VERSION) { + if (len<(AES_BLOCK_SIZE+SHA_DIGEST_LENGTH+1)) + return 0; + + /* omit explicit iv */ + memcpy(ctx->iv,in,AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } + else if (len<(SHA_DIGEST_LENGTH+1)) return 0; - /* omit explicit iv */ - out += iv; - len -= iv; +#if defined(STITCHED_DECRYPT_CALL) + if (len>=1024 && ctx->key_len==32) { + /* decrypt last block */ + memcpy(tail_iv,in+len-2*AES_BLOCK_SIZE,AES_BLOCK_SIZE); + aesni_cbc_encrypt(in+len-AES_BLOCK_SIZE, + out+len-AES_BLOCK_SIZE,AES_BLOCK_SIZE, + &key->ks,tail_iv,0); + stitch=1; + } else +#endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); /* figure out payload length */ pad = out[len-1]; @@ -476,6 +553,30 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, key->md = key->head; SHA1_Update(&key->md,key->aux.tls_aad,plen); +#if defined(STITCHED_DECRYPT_CALL) + if (stitch) { + blocks = (len-(256+32+SHA_CBLOCK))/SHA_CBLOCK; + aes_off = len-AES_BLOCK_SIZE-blocks*SHA_CBLOCK; + sha_off = SHA_CBLOCK-plen; + + aesni_cbc_encrypt(in,out,aes_off, + &key->ks,ctx->iv,0); + + SHA1_Update(&key->md,out,sha_off); + aesni256_cbc_sha1_dec(in+aes_off, + out+aes_off,blocks,&key->ks,ctx->iv, + &key->md,out+sha_off); + + sha_off += blocks*=SHA_CBLOCK; + out += sha_off; + len -= sha_off; + inp_len -= sha_off; + + key->md.Nl += (blocks<<3); /* at most 18 bits */ + memcpy(ctx->iv,tail_iv,AES_BLOCK_SIZE); + } +#endif + #if 1 len -= SHA_DIGEST_LENGTH; /* amend mac */ if (len>=(256+SHA_CBLOCK)) { @@ -630,6 +731,34 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, #endif return ret; } else { +#if defined(STITCHED_DECRYPT_CALL) + if (len>=1024 && ctx->key_len==32) { + if (sha_off%=SHA_CBLOCK) + blocks = (len-3*SHA_CBLOCK)/SHA_CBLOCK; + else + blocks = (len-2*SHA_CBLOCK)/SHA_CBLOCK; + aes_off = len-blocks*SHA_CBLOCK; + + aesni_cbc_encrypt(in,out,aes_off, + &key->ks,ctx->iv,0); + SHA1_Update(&key->md,out,sha_off); + aesni256_cbc_sha1_dec(in+aes_off, + out+aes_off,blocks,&key->ks,ctx->iv, + &key->md,out+sha_off); + + sha_off += blocks*=SHA_CBLOCK; + out += sha_off; + len -= sha_off; + + key->md.Nh += blocks>>29; + key->md.Nl += blocks<<=3; + if (key->md.Nl<(unsigned int)blocks) key->md.Nh++; + } else +#endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); + SHA1_Update(&key->md,out,len); } } @@ -701,6 +830,8 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void } } #if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5+16+((arg+20+16)&-16)); case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: { EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = @@ -708,7 +839,7 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void unsigned int n4x=1, x4; unsigned int frag, last, packlen, inp_len; - if (arginp[11]<<8|param->inp[12]; @@ -717,9 +848,17 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION) return -1; - if (inp_len<4096) return 0; /* too short */ + if (inp_len) + { + if (inp_len<4096) return 0; /* too short */ - if (OPENSSL_ia32cap_P[2]&(1<<5)) n4x=2; /* AVX2 */ + if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) + n4x=2; /* AVX2 */ + } + else if ((n4x=param->interleave/4) && n4x<=2) + inp_len = param->len; + else + return -1; key->md = key->head; SHA1_Update(&key->md,param->inp,13);