X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fmodes%2Fasm%2Fghash-x86_64.pl;h=cd93c0f95ec0da61f87913ab505df8be62a25130;hp=b80be6c742d17e485195c719a63cb6b81e49a0f4;hb=46bf83f07ae1ba7fda435c90af93960e77159f4b;hpb=d364506a24a59244953730d917df02876ff2aaca diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index b80be6c742..cd93c0f95e 100644 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -20,17 +20,20 @@ # gcc 3.4.x(*) assembler # # P4 28.6 14.0 +100% -# Opteron 18.5 7.7 +140% -# Core2 17.5 8.1(**) +115% +# Opteron 19.3 7.7 +150% +# Core2 17.8 8.1(**) +120% +# Atom 31.6 16.8 +88% +# VIA Nano 21.8 10.1 +115% # # (*) comparison is not completely fair, because C results are -# for vanilla "256B" implementation, not "528B";-) +# for vanilla "256B" implementation, while assembler results +# are for "528B";-) # (**) it's mystery [to me] why Core2 result is not same as for # Opteron; # May 2010 # -# Add PCLMULQDQ version performing at 2.07 cycles per processed byte. +# Add PCLMULQDQ version performing at 2.02 cycles per processed byte. # See ghash-x86.pl for background information and details about coding # techniques. # @@ -49,7 +52,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; # common register layout $nlo="%rax"; @@ -797,35 +801,7 @@ se_handler: ___ } -sub rex { - local *opcode=shift; - my ($dst,$src)=@_; - - if ($dst>=8 || $src>=8) { - $rex=0x40; - $rex|=0x04 if($dst>=8); - $rex|=0x01 if($src>=8); - push @opcode,$rex; - } -} - -sub pclmulqdq { - my $arg=shift; - my @opcode=(0x66); - - if ($arg=~/\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { - rex(\@opcode,$3,$2); - push @opcode,0x0f,0x3a,0x44; - push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M - my $c=$1; - push @opcode,$c=~/^0/?oct($c):$c; - return ".byte\t".join(',',@opcode); - } - return "pclmulqdq\t".$arg; -} - $code =~ s/\`([^\`]*)\`/eval($1)/gem; -$code =~ s/\bpclmulqdq\s+(\$.*%xmm[0-9]+).*$/pclmulqdq($1)/gem; print $code;