PPC assembly pack: correct POWER9 results.
authorAndy Polyakov <appro@openssl.org>
Sat, 2 Jun 2018 12:03:27 +0000 (14:03 +0200)
committerAndy Polyakov <appro@openssl.org>
Sun, 3 Jun 2018 19:20:06 +0000 (21:20 +0200)
As it turns out originally published results were skewed by "turbo"
mode. VM apparently remains oblivious to dynamic frequency scaling,
and reports that processor operates at "base" frequency at all times.
While actual frequency gets increased under load.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6406)

crypto/aes/asm/aesp8-ppc.pl
crypto/chacha/asm/chacha-ppc.pl
crypto/modes/asm/ghashp8-ppc.pl
crypto/poly1305/asm/poly1305-ppc.pl
crypto/poly1305/asm/poly1305-ppcfp.pl
crypto/poly1305/poly1305_ieee754.c
crypto/sha/asm/keccak1600-ppc64.pl
crypto/sha/asm/keccak1600p8-ppc.pl
crypto/sha/asm/sha512p8-ppc.pl

index 86709404cf6df4a36eb175dff918278e0df4be1a..488b133250c677edd34a98d3fd15ae9c8e5ae6ad 100755 (executable)
@@ -40,7 +40,8 @@
 #              CBC en-/decrypt CTR     XTS
 # POWER8[le]   3.96/0.72       0.74    1.1
 # POWER8[be]   3.75/0.65       0.66    1.0
-# POWER9[le]   3.05/0.65       0.65    0.80
+# POWER9[le]   4.02/0.86       0.84    1.05
+# POWER9[be]   3.99/0.78       0.79    0.97
 
 $flavour = shift;
 
index 350d5fae37daa5139c5eaf2fbf19b109b19681c8..6dd05819adaf289007b35ac0ece444232964a494 100755 (executable)
@@ -27,7 +27,7 @@
 # PPC970/G5            9.29/+160%      ?
 # POWER7               8.62/+61%       3.38
 # POWER8               8.70/+51%       3.36
-# POWER9               6.61/+29%       3.30(*)
+# POWER9               8.80/+29%       4.50(*)
 #
 # (*)  this is trade-off result, it's possible to improve it, but
 #      then it would negatively affect all others;
index 6df485efccafddaebf155464732c527f30bceb81..6a2ac712950b470ec07a6fd5c93b7ece8b232ada 100755 (executable)
@@ -30,7 +30,7 @@
 # 2x aggregated reduction improves performance by 50% (resulting
 # performance on POWER8 is 1 cycle per processed byte), and 4x
 # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
-# POWER9 delivers 0.40 cpb.
+# POWER9 delivers 0.51 cpb.
 
 $flavour=shift;
 $output =shift;
index cb4ae23869bed9abe7842b2bd8c790b29dcd09f9..0c6d015d585bee046ab69cd12e0699f291aa8e5b 100755 (executable)
@@ -28,7 +28,7 @@
 # PPC970               7.00/+114%      3.51/+205%
 # POWER7               3.75/+260%      1.93/+100%
 # POWER8               -               2.03/+200%
-# POWER9               -               1.56/+150%
+# POWER9               -               2.00/+150%
 #
 # Do we need floating-point implementation for PPC? Results presented
 # in poly1305_ieee754.c are tricky to compare to, because they are for
index 2abb8e20b46e72c748e1b042ad668582b47bc5a7..09f8185848427441c10132813efab265e807b1d4 100755 (executable)
@@ -26,7 +26,6 @@
 # PPC970               6.03/+80%
 # POWER7               3.50/+30%
 # POWER8               3.75/+10%
-# POWER9               2.80/+12%
 
 $flavour = shift;
 
index 1a06e03558aed20fb34b7b0af9a89f8261e8bb53..995a02e5c139cdc10f730ad3159229d1dd680bf5 100644 (file)
@@ -38,7 +38,6 @@
  * POWER6               4.92
  * POWER7               4.50
  * POWER8               4.10
- * POWER9               3.14
  *
  * z10                  11.2
  * z196+                7.30
index bc1023e3997d34a9ee8ffd8aba6def26946d1101..30e70c5d6d7b9bf8fed6da6438d7930540800f37 100755 (executable)
@@ -30,7 +30,7 @@
 # PPC970/G5    14.6/+120%
 # POWER7       10.3/+100%
 # POWER8       11.5/+85%
-# POWER9       7.2/+45%
+# POWER9       9.4/+45%
 #
 # (*)  Corresponds to SHA3-256. Percentage after slash is improvement
 #      over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
index a0aeeb04123f756aaa9f5fff3c4be57c055c2fd0..de2bcd660a0926c42cc64ce6f850b15838ecdf2c 100755 (executable)
@@ -23,7 +23,7 @@
 # buffer for r=1088, which matches SHA3-256. This is 17% better than
 # scalar PPC64 code. It probably should be noted that if POWER8's
 # successor can achieve higher scalar instruction issue rate, then
-# this module will loose... And it does on POWER9 with 8.8 vs. 7.2.
+# this module will loose... And it does on POWER9 with 12.0 vs. 9.4.
 
 $flavour = shift;
 
index a33ae4dc458937bc92c1106aafda5b3ea0062aff..7a8d4358f0a80bd7fc54d0c8cfc405fa8b4e7d8f 100755 (executable)
@@ -37,8 +37,8 @@
 # build of sha512-ppc.pl, presented for reference.
 #
 #              POWER8          POWER9
-# SHA256       9.9 [15.8]      9.2 [9.3]
-# SHA512       6.3 [10.3]      5.8 [5.9]
+# SHA256       9.9 [15.8]      12.2 [12.5]
+# SHA512       6.3 [10.3]      7.7 [7.9]
 
 $flavour=shift;
 $output =shift;