X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha512-armv8.pl;h=717473b86e75f27ad160f8ffbfa5a3c6375c35c9;hp=6935ed6521569f01ff535c00f683810dba1b1ad6;hb=94376cccb4ed5b376220bffe0739140ea9dad8c8;hpb=cd91fd7c32428c0deb503f19b8061e0980476876 diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl index 6935ed6521..717473b86e 100644 --- a/crypto/sha/asm/sha512-armv8.pl +++ b/crypto/sha/asm/sha512-armv8.pl @@ -14,16 +14,29 @@ # # SHA256-hw SHA256(*) SHA512 # Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) -# Cortex-A5x n/a n/a n/a +# Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +# Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) # # (*) Software SHA256 results are of lesser relevance, presented # mostly for informational purposes. # (**) The result is a trade-off: it's possible to improve it by -# 10%, but at the cost of 20% loss on Cortex-A5x. +# 10% (or by 1 cycle per round), but at the cost of 20% loss +# on Cortex-A53 (or by 4 cycles per round). +# (***) Super-impressive coefficients over gcc-generated code are +# indication of some compiler "pathology", most notably code +# generated with -mgeneral-regs-only is significanty faster +# and the gap is only 40-90%. $flavour=shift; $output=shift; -open STDOUT,">$output"; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; if ($output =~ /512/) { $BITS=512; @@ -147,6 +160,7 @@ $code.=<<___; .text +.extern OPENSSL_armcap_P .globl $func .type $func,%function .align 6 @@ -176,7 +190,7 @@ $code.=<<___; ldp $E,$F,[$ctx,#4*$SZ] add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input ldp $G,$H,[$ctx,#6*$SZ] - adr $Ktbl,K$BITS + adr $Ktbl,.LK$BITS stp $ctx,$num,[x29,#96] .Loop: @@ -226,8 +240,8 @@ $code.=<<___; .size $func,.-$func .align 6 -.type K$BITS,%object -K$BITS: +.type .LK$BITS,%object +.LK$BITS: ___ $code.=<<___ if ($SZ==8); .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -292,7 +306,7 @@ $code.=<<___ if ($SZ==4); .long 0 //terminator ___ $code.=<<___; -.size K$BITS,.-K$BITS +.size .LK$BITS,.-.LK$BITS .align 3 .LOPENSSL_armcap_P: .quad OPENSSL_armcap_P-. @@ -317,7 +331,7 @@ sha256_block_armv8: add x29,sp,#0 ld1.32 {$ABCD,$EFGH},[$ctx] - adr $Ktbl,K256 + adr $Ktbl,.LK256 .Loop_hw: ld1 {@MSG[0]-@MSG[3]},[$inp],#64