X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fmodes%2Fasm%2Fghash-s390x.pl;h=39096b423ad805d7e6475e6aa42bdc8b6ed041ae;hp=16ad034fc12d61c165d7d21adde534ab89d09d3b;hb=b9e3d7e0f6678a991621cfbc4b11ace7860031a0;hpb=e822c756b66024d49ab936bf77b745206660fcd2 diff --git a/crypto/modes/asm/ghash-s390x.pl b/crypto/modes/asm/ghash-s390x.pl index 16ad034fc1..39096b423a 100644 --- a/crypto/modes/asm/ghash-s390x.pl +++ b/crypto/modes/asm/ghash-s390x.pl @@ -28,6 +28,15 @@ # remains z/Architecture specific. On z990 it was measured to perform # 2.8x better than 32-bit code generated by gcc 4.3. +# March 2011. +# +# Support for hardware KIMD-GHASH is verified to produce correct +# result and therefore is engaged. On z196 it was measured to process +# 8KB buffer ~7 faster than software implementation. It's not as +# impressive for smaller buffer sizes and for smallest 16-bytes buffer +# it's actually almost 2 times slower. Which is the reason why +# KIMD-GHASH is not used in gcm_gmult_4bit. + $flavour = shift; if ($flavour =~ /3[12]/) { @@ -41,7 +50,7 @@ if ($flavour =~ /3[12]/) { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; -$softonly=1; # disable hardware support for now +$softonly=0; $Zhi="%r0"; $Zlo="%r1"; @@ -70,7 +79,7 @@ $code.=<<___; .align 32 gcm_gmult_4bit: ___ -$code.=<<___ if(!$softonly); +$code.=<<___ if(!$softonly && 0); # hardware is slow for single block... larl %r1,OPENSSL_s390xcap_P lg %r0,0(%r1) tmhl %r0,0x4000 # check for message-security-assist @@ -129,7 +138,7 @@ $code.=<<___ if(!$softonly); .align 32 .Lsoft_ghash: ___ -$cdoe.=<<___ if ($flavour =~ /3[12]/); +$code.=<<___ if ($flavour =~ /3[12]/); llgfr $len,$len ___ $code.=<<___; @@ -177,13 +186,13 @@ $code.=<<___; sllg $rem1,$Zlo,3 xgr $Zlo,$tmp ngr $rem1,$x78 + sllg $tmp,$Zhi,60 j .Lghash_inner .align 16 .Lghash_inner: srlg $Zlo,$Zlo,4 - sllg $tmp,$Zhi,60 - xg $Zlo,8($nlo,$Htbl) srlg $Zhi,$Zhi,4 + xg $Zlo,8($nlo,$Htbl) llgc $xi,0($cnt,$Xi) xg $Zhi,0($nlo,$Htbl) sllg $nlo,$xi,4 @@ -204,9 +213,9 @@ $code.=<<___; sllg $rem1,$Zlo,3 xgr $Zlo,$tmp ngr $rem1,$x78 + sllg $tmp,$Zhi,60 brct $cnt,.Lghash_inner - sllg $tmp,$Zhi,60 srlg $Zlo,$Zlo,4 srlg $Zhi,$Zhi,4 xg $Zlo,8($nlo,$Htbl)