X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Faes%2Fasm%2Faesni-sha1-x86_64.pl;h=952b4cfdd668297ead9176cbc5ee68f3b3380fee;hp=ff0b068229cb4e6f32d939f754345b5a1b269b38;hb=b7f5503fa6e1feebec2ac12b8ddcb5b5672452a6;hpb=619b94667cc7a097f6d1e2123c4f4c2c85afb8f7 diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl b/crypto/aes/asm/aesni-sha1-x86_64.pl index ff0b068229..952b4cfdd6 100644 --- a/crypto/aes/asm/aesni-sha1-x86_64.pl +++ b/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -25,6 +25,7 @@ # Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%) # Ivy Bridge 5.05[+4.6] 9.65 5.54 +74% # Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%) +# Skylake 2.63[+3.5(4.1)] 6.17(6.69) 4.23(4.44) +46%(+51%) # Bulldozer 5.77[+6.0] 11.72 6.37 +84% # # AES-192-CBC @@ -39,6 +40,7 @@ # Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%) # Ivy Bridge 7.05 11.65 7.12 +64% # Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%) +# Skylake 3.62 7.16(7.68) 4.56(4.76) +57%(+61$) # Bulldozer 8.00 13.95 8.25 +69% # # (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for @@ -94,6 +96,9 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && `ml64 2>&1` =~ /Version ([0-9]+)\./ && $1>=10); +$avx=1 if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/ && $2>=3.0); + +$shaext=1; ### set to zero if compiling for 1.0.1 $stitched_decrypt=0; @@ -119,6 +124,8 @@ aesni_cbc_sha1_enc: # caller should check for SSSE3 and AES-NI bits mov OPENSSL_ia32cap_P+0(%rip),%r10d mov OPENSSL_ia32cap_P+4(%rip),%r11 +___ +$code.=<<___ if ($shaext); bt \$61,%r11 # check SHA bit jc aesni_cbc_sha1_enc_shaext ___ @@ -722,7 +729,7 @@ ___ if ($stitched_decrypt) {{{ # reset ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); -$j=$jj=$r=$sn=$rx=0; +$j=$jj=$r=$rx=0; $Xi=4; # reassign for Atom Silvermont (see above) @@ -990,7 +997,7 @@ $code.=<<___; .size aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3 ___ }}} -$j=$jj=$r=$sn=$rx=0; +$j=$jj=$r=$rx=0; if ($avx) { my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); @@ -1436,7 +1443,7 @@ ___ # reset ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); -$j=$jj=$r=$sn=$rx=0; +$j=$jj=$r=$rx=0; $Xi=4; @aes256_dec = ( @@ -1657,7 +1664,7 @@ K_XX_XX: .asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by " .align 64 ___ - {{{ + if ($shaext) {{{ ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); $rounds="%r11d"; @@ -1676,7 +1683,7 @@ aesni_cbc_sha1_enc_shaext: mov `($win64?56:8)`(%rsp),$inp # load 7th argument ___ $code.=<<___ if ($win64); - lea `-8-4*16`(%rsp),%rsp + lea `-8-10*16`(%rsp),%rsp movaps %xmm6,-8-10*16(%rax) movaps %xmm7,-8-9*16(%rax) movaps %xmm8,-8-8*16(%rax) @@ -1867,7 +1874,21 @@ ssse3_handler: lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail +___ +$code.=<<___ if ($shaext); + lea aesni_cbc_sha1_enc_shaext(%rip),%r10 + cmp %r10,%rbx + jb .Lseh_no_shaext + lea (%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + lea 168(%rax),%rax # adjust stack pointer + jmp .Lcommon_seh_tail +.Lseh_no_shaext: +___ +$code.=<<___; lea 96(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$20,%ecx @@ -1939,6 +1960,11 @@ $code.=<<___ if ($avx); .rva .LSEH_end_aesni_cbc_sha1_enc_avx .rva .LSEH_info_aesni_cbc_sha1_enc_avx ___ +$code.=<<___ if ($shaext); + .rva .LSEH_begin_aesni_cbc_sha1_enc_shaext + .rva .LSEH_end_aesni_cbc_sha1_enc_shaext + .rva .LSEH_info_aesni_cbc_sha1_enc_shaext +___ $code.=<<___; .section .xdata .align 8 @@ -1953,6 +1979,12 @@ $code.=<<___ if ($avx); .rva ssse3_handler .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] ___ +$code.=<<___ if ($shaext); +.LSEH_info_aesni_cbc_sha1_enc_shaext: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_shaext,.Lepilogue_shaext # HandlerData[] +___ } ####################################################################