PPC assembly pack: add POWER9 results.
authorAndy Polyakov <appro@openssl.org>
Wed, 9 May 2018 10:24:05 +0000 (12:24 +0200)
committerAndy Polyakov <appro@openssl.org>
Thu, 10 May 2018 09:44:21 +0000 (11:44 +0200)
Reviewed-by: Rich Salz <rsalz@openssl.org>
crypto/aes/asm/aesp8-ppc.pl
crypto/chacha/asm/chacha-ppc.pl
crypto/modes/asm/ghashp8-ppc.pl
crypto/poly1305/asm/poly1305-ppc.pl
crypto/poly1305/asm/poly1305-ppcfp.pl
crypto/poly1305/poly1305_ieee754.c
crypto/sha/asm/keccak1600-ppc64.pl
crypto/sha/asm/keccak1600p8-ppc.pl
crypto/sha/asm/sha512p8-ppc.pl

index 7463df6..e1be23a 100755 (executable)
@@ -40,6 +40,7 @@
 #              CBC en-/decrypt CTR     XTS
 # POWER8[le]   3.96/0.72       0.74    1.1
 # POWER8[be]   3.75/0.65       0.66    1.0
+# POWER9[le]   3.05/0.65       0.65    0.80
 
 $flavour = shift;
 
index f972ee4..af2f037 100755 (executable)
@@ -27,6 +27,7 @@
 # PPC970/G5            9.29/+160%      4.60
 # POWER7               8.62/+61%       4.27
 # POWER8               8.70/+51%       3.96
+# POWER9               6.61/+29%       3.67
 
 $flavour = shift;
 
index 45c6438..a1d5789 100755 (executable)
@@ -30,6 +30,7 @@
 # 2x aggregated reduction improves performance by 50% (resulting
 # performance on POWER8 is 1 cycle per processed byte), and 4x
 # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+# POWER9 delivers 0.40 cpb.
 
 $flavour=shift;
 $output =shift;
index ab65910..8e105d7 100755 (executable)
@@ -28,6 +28,7 @@
 # PPC970               7.00/+114%      3.51/+205%
 # POWER7               3.75/+260%      1.93/+100%
 # POWER8               -               2.03/+200%
+# POWER9               -               1.56/+150%
 #
 # Do we need floating-point implementation for PPC? Results presented
 # in poly1305_ieee754.c are tricky to compare to, because they are for
index 49f70a8..fc62baa 100755 (executable)
@@ -26,6 +26,7 @@
 # PPC970               6.03/+80%
 # POWER7               3.50/+30%
 # POWER8               3.75/+10%
+# POWER9               2.80/+12%
 
 $flavour = shift;
 
index 995a02e..1a06e03 100644 (file)
@@ -38,6 +38,7 @@
  * POWER6               4.92
  * POWER7               4.50
  * POWER8               4.10
+ * POWER9               3.14
  *
  * z10                  11.2
  * z196+                7.30
index f89f71c..60ed2f2 100755 (executable)
@@ -30,6 +30,7 @@
 # PPC970/G5    14.6/+120%
 # POWER7       10.3/+100%
 # POWER8       11.5/+85%
+# POWER9       7.2/+45%
 #
 # (*)  Corresponds to SHA3-256. Percentage after slash is improvement
 #      over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
index feec688..95e6242 100755 (executable)
@@ -23,7 +23,7 @@
 # buffer for r=1088, which matches SHA3-256. This is 17% better than
 # scalar PPC64 code. It probably should be noted that if POWER8's
 # successor can achieve higher scalar instruction issue rate, then
-# this module will loose...
+# this module will loose... And it does on POWER9 with 8.8 vs. 7.2.
 
 $flavour = shift;
 
index 93dfef2..e6e9467 100755 (executable)
@@ -36,9 +36,9 @@
 # little-endian system]. Numbers in square brackets are for 64-bit
 # build of sha512-ppc.pl, presented for reference.
 #
-#              POWER8
-# SHA256       9.9 [15.8]
-# SHA512       6.3 [10.3]
+#              POWER8          POWER9
+# SHA256       9.9 [15.8]      9.2 [9.3]
+# SHA512       6.3 [10.3]      5.8 [5.9]
 
 $flavour=shift;
 $output =shift;