crypto/cast/asm/cast-586.pl: +5% on PIII and remove obsolete readme.
[openssl.git] / crypto / cast / asm / cast-586.pl
index 6be0bfe57245e75fd0f90fafaffffcf4de51bf03..ec2eab179d536d2cf3c5b8417377201340393de6 100644 (file)
@@ -1,9 +1,13 @@
 #!/usr/local/bin/perl
 
 #!/usr/local/bin/perl
 
-# define for pentium pro friendly version
+# This flag makes the inner loop one cycle longer, but generates 
+# code that runs %30 faster on the pentium pro/II, 44% faster
+# of PIII, while only %7 slower on the pentium.
+# By default, this flag is on.
 $ppro=1;
 
 $ppro=1;
 
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
 require "cbc.pl";
 
 require "x86asm.pl";
 require "cbc.pl";
 
@@ -139,11 +143,11 @@ sub E_CAST {
     &rotl(     $tmp4,          &LB($tmp1));
 
     if ($ppro) {
     &rotl(     $tmp4,          &LB($tmp1));
 
     if ($ppro) {
-       &mov(   $tmp2,          $tmp4);         # B
        &xor(   $tmp1,          $tmp1);
        &xor(   $tmp1,          $tmp1);
+       &mov(   $tmp2,          0xff);
        
        &movb(  &LB($tmp1),     &HB($tmp4));    # A
        
        &movb(  &LB($tmp1),     &HB($tmp4));    # A
-       &and(   $tmp2,          0xff);
+       &and(   $tmp2,          $tmp4);
 
        &shr(   $tmp4,          16);            #
        &xor(   $tmp3,          $tmp3);
 
        &shr(   $tmp4,          16);            #
        &xor(   $tmp3,          $tmp3);