Make pshufw optional and update performance table in sha512-sse2.pl
authorAndy Polyakov <appro@openssl.org>
Thu, 3 Nov 2005 15:23:11 +0000 (15:23 +0000)
committerAndy Polyakov <appro@openssl.org>
Thu, 3 Nov 2005 15:23:11 +0000 (15:23 +0000)
[as per http://cvs.openssl.org/chngview?cn=14551].

crypto/perlasm/x86unix.pl
crypto/sha/asm/sha512-sse2.pl

index 38c7a6df4260637f03452dd3297cecf2dd43f296..8b6e502cfb1fc81e9ca1ab3e4ff34fcda2a86c5a 100644 (file)
@@ -227,9 +227,9 @@ sub main'pxor       { &out2("pxor",@_); }
 sub main'por   { &out2("por",@_); }
 sub main'pand  { &out2("pand",@_); }
 sub main'movq  {
 sub main'por   { &out2("por",@_); }
 sub main'pand  { &out2("pand",@_); }
 sub main'movq  {
-       local($p1,$p2)=@_;
-       if ($p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
-               # movq between mmx registers sinks Intel CPUs
+       local($p1,$p2,$optimize)=@_;
+       if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+               # movq between mmx registers can sink Intel CPUs
                {       push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n");      }
        else    {       &out2("movq",@_);                               }
        }
                {       push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n");      }
        else    {       &out2("movq",@_);                               }
        }
index 10902bf673d58891478000d7a0e27e5b2cdc7a9f..6c0a4c4d4e9adf1b1e738174b90b212c0c21ff5d 100644 (file)
@@ -22,7 +22,7 @@
 #
 #              2.4GHz P4       1.4GHz AMD32    1.4GHz AMD64(*)
 # SHA256/gcc(*)        54              43              59
 #
 #              2.4GHz P4       1.4GHz AMD32    1.4GHz AMD64(*)
 # SHA256/gcc(*)        54              43              59
-# SHA512/gcc   17              23              92
+# SHA512/gcc   21              24              92
 # SHA512/sse2  61(**)          57(**)
 # SHA512/icc   26              28
 # SHA256/icc(*)        65              54
 # SHA512/sse2  61(**)          57(**)
 # SHA512/icc   26              28
 # SHA256/icc(*)        65              54