sha1-ppc.pl: shave off one cycle from BODY_20_39

author Andy Polyakov <appro@openssl.org>

Mon, 21 Jul 2014 13:29:09 +0000 (15:29 +0200)

committer Andy Polyakov <appro@openssl.org>

Mon, 21 Jul 2014 13:30:59 +0000 (15:30 +0200)
author Andy Polyakov <appro@openssl.org>
Mon, 21 Jul 2014 13:29:09 +0000 (15:29 +0200)
committer Andy Polyakov <appro@openssl.org>
Mon, 21 Jul 2014 13:30:59 +0000 (15:30 +0200)
diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl

index 24a5d065d9fdaac77006afc97a6f64d5e14d81fc..df5989610c4c70571e30499ce3bc7b69d02e59bc 100755 (executable)
--- a/crypto/sha/asm/sha1-ppc.pl
+++ b/crypto/sha/asm/sha1-ppc.pl
@@ -125,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
  my $j=$i+1;
  $code.=<<___ if ($i<79);
         add     $f,$K,$e
  my $j=$i+1;
  $code.=<<___ if ($i<79);
         add     $f,$K,$e
+       xor     $t0,$b,$d
         rotlwi  $e,$a,5
         xor     @X[$j%16],@X[$j%16],@X[($j+2)%16]
         add     $f,$f,@X[$i%16]
         rotlwi  $e,$a,5
         xor     @X[$j%16],@X[$j%16],@X[($j+2)%16]
         add     $f,$f,@X[$i%16]
-       xor     $t0,$b,$c
+       xor     $t0,$t0,$c
         xor     @X[$j%16],@X[$j%16],@X[($j+8)%16]
         xor     @X[$j%16],@X[$j%16],@X[($j+8)%16]
-       add     $f,$f,$e
+       add     $f,$f,$t0
         rotlwi  $b,$b,30
         rotlwi  $b,$b,30
-       xor     $t0,$t0,$d
         xor     @X[$j%16],@X[$j%16],@X[($j+13)%16]
         xor     @X[$j%16],@X[$j%16],@X[($j+13)%16]
-       add     $f,$f,$t0
+       add     $f,$f,$e
         rotlwi  @X[$j%16],@X[$j%16],1
  ___
  $code.=<<___ if ($i==79);
         add     $f,$K,$e
         rotlwi  @X[$j%16],@X[$j%16],1
  ___
  $code.=<<___ if ($i==79);
         add     $f,$K,$e
+       xor     $t0,$b,$d
         rotlwi  $e,$a,5
         lwz     r16,0($ctx)
         add     $f,$f,@X[$i%16]
         rotlwi  $e,$a,5
         lwz     r16,0($ctx)
         add     $f,$f,@X[$i%16]
-       xor     $t0,$b,$c
+       xor     $t0,$t0,$c
         lwz     r17,4($ctx)
         lwz     r17,4($ctx)
-       add     $f,$f,$e
+       add     $f,$f,$t0
         rotlwi  $b,$b,30
         lwz     r18,8($ctx)
         rotlwi  $b,$b,30
         lwz     r18,8($ctx)
-       xor     $t0,$t0,$d
         lwz     r19,12($ctx)
         lwz     r19,12($ctx)
-       add     $f,$f,$t0
+       add     $f,$f,$e
         lwz     r20,16($ctx)
  ___
  }
         lwz     r20,16($ctx)
  ___
  }
diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl

index cb0268c9d6184d738db74c971e4290cda4af2350..a316b31a4f90da0d3da21ad848b3824c5ca91ea1 100755 (executable)
--- a/crypto/sha/asm/sha512p8-ppc.pl
+++ b/crypto/sha/asm/sha512p8-ppc.pl
@@ -13,8 +13,8 @@
  # always virtualized setup with possibly throttled processor.
  # Relative comparison is therefore more informative. This module is
  # ~60% faster than integer-only sha512-ppc.pl. To anchor to something
  # always virtualized setup with possibly throttled processor.
  # Relative comparison is therefore more informative. This module is
  # ~60% faster than integer-only sha512-ppc.pl. To anchor to something
-# else, SHA256 is 16% slower than sha1-ppc.pl and 2.5x slower than
-# hardware-assisted aes-128-cbc encrypt. SHA512 is 33% faster than
+# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
+# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
  # sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
  # result is degree of computational resources' utilization. POWER8 is
  # "massively multi-threaded chip" and difference between single- and
  # sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
  # result is degree of computational resources' utilization. POWER8 is
  # "massively multi-threaded chip" and difference between single- and
author	Andy Polyakov <appro@openssl.org>
	Mon, 21 Jul 2014 13:29:09 +0000 (15:29 +0200)
committer	Andy Polyakov <appro@openssl.org>
	Mon, 21 Jul 2014 13:30:59 +0000 (15:30 +0200)
crypto/sha/asm/sha1-ppc.pl		patch \| blob \| history
crypto/sha/asm/sha512p8-ppc.pl		patch \| blob \| history