x86_64 assembly pack: add Goldmont performance results.
authorAndy Polyakov <appro@openssl.org>
Fri, 14 Oct 2016 11:25:06 +0000 (13:25 +0200)
committerAndy Polyakov <appro@openssl.org>
Mon, 24 Oct 2016 11:01:13 +0000 (13:01 +0200)
Reviewed-by: Richard Levitte <levitte@openssl.org>
crypto/aes/asm/aesni-x86_64.pl
crypto/aes/asm/bsaes-x86_64.pl
crypto/aes/asm/vpaes-x86_64.pl
crypto/chacha/asm/chacha-x86.pl
crypto/chacha/asm/chacha-x86_64.pl
crypto/modes/asm/ghash-x86_64.pl
crypto/poly1305/asm/poly1305-x86.pl
crypto/poly1305/asm/poly1305-x86_64.pl
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha512-586.pl
crypto/sha/asm/sha512-x86_64.pl

index 397e82f8c76d3159407e6e15c919f783910c7e37..443f2f7542f82c0be992f708c0c2d4be66b4e819 100644 (file)
 # Haswell      4.44/0.63       0.63    0.73    0.63    0.70
 # Skylake      2.62/0.63       0.63    0.63    0.63
 # Silvermont   5.75/3.54       3.56    4.12    3.87(*) 4.11
+# Goldmont     3.82/1.26       1.26    1.29    1.29    1.50
 # Bulldozer    5.77/0.70       0.72    0.90    0.70    0.95
 #
 # (*)  Atom Silvermont ECB result is suboptimal because of penalties
index d257a3efe6c60d7bcb48c0075a89699cb65d3c43..0d49947d81483d2d683e65b1483355e17c1cc27a 100644 (file)
@@ -48,6 +48,7 @@
 # Nehalem(**)  7.63            6.88            +11%
 # Atom         17.1            16.4            +4%
 # Silvermont   -               12.9
+# Goldmont     -               8.85
 #
 # (*)  Comparison is not completely fair, because "this" is ECB,
 #      i.e. no extra processing such as counter values calculation
@@ -87,6 +88,7 @@
 # Nehalem      7.80
 # Atom         17.9
 # Silvermont   14.0
+# Goldmont     10.2
 #
 # November 2011.
 #
index dd1f13a271859f513fea404b7dc39dad5f5115cf..b715aca167d170f6dc0224776c807e3bc789375a 100644 (file)
@@ -38,6 +38,7 @@
 # Nehalem      29.6/40.3/14.6          10.0/11.8
 # Atom         57.3/74.2/32.1          60.9/77.2(***)
 # Silvermont   52.7/64.0/19.5          48.8/60.8(***)
+# Goldmont     38.9/49.0/17.8          10.6/12.6
 #
 # (*)  "Hyper-threading" in the context refers rather to cache shared
 #      among multiple cores, than to specifically Intel HTT. As vast
index 3c6e67d9c8738ec84c51bed0b68928d54a31590c..f00b7d2935b47b6789e0990a57ddce0b88a41036 100755 (executable)
@@ -29,6 +29,7 @@
 # Sandy Bridge 10.5/+47%       3.20
 # Haswell      8.15/+50%       2.83
 # Silvermont   17.4/+36%       8.35
+# Goldmont     13.4/+40%       4.36
 # Sledgehammer 10.2/+54%
 # Bulldozer    13.4/+50%       4.38(*)
 #
index 4b1750cd5dea6dd41bd5896ad7e07856e9a94e9d..347dfcb3e578a35fb54981dd9880f17cd8d58d28 100755 (executable)
@@ -29,6 +29,7 @@
 # Ivy Bridge   6.71/+46%       5.40/6.49       2.41
 # Haswell      5.92/+43%       5.20/6.45       2.42        1.23
 # Silvermont   12.0/+33%       7.75/7.40       7.03(iii)
+# Goldmont     10.6/+17%       5.10/-          3.28
 # Sledgehammer 7.28/+52%       -/14.2(ii)      -
 # Bulldozer    9.66/+28%       9.85/11.1       3.06(iv)
 # VIA Nano     10.5/+46%       6.72/8.60       6.05
index 6941b3c234bd3c119dadcb4e906bd0ed4315e63a..c782edcbb84b76a4c373e97b6c858b99ccddb0db 100644 (file)
@@ -74,6 +74,7 @@
 # Skylake      0.44(+110%)(if system doesn't support AVX)
 # Bulldozer    1.49(+27%)
 # Silvermont   2.88(+13%)
+# Goldmont     1.08(+24%)
 
 # March 2013
 #
index ecc0ee62eaecb9a14bb21a7666d584b58713cd62..ab24dfcfaddaaa082f36382d30dd62adf777581f 100755 (executable)
@@ -30,6 +30,7 @@
 # Sandy Bridge 3.90/+100%      1.36
 # Haswell      3.88/+70%       1.18            0.72
 # Silvermont   11.0/+40%       4.80
+# Goldmont     4.10/+200%      2.10
 # VIA Nano     6.71/+90%       2.47
 # Sledgehammer 3.51/+180%      4.27
 # Bulldozer    4.53/+140%      1.31
index 784ff4b75837fb2f7b2b60328eb60eedaa90d3ef..4c22ded58024ba5c84fb5196db1e08e011c2151c 100755 (executable)
@@ -29,6 +29,7 @@
 # Haswell      1.14/+175%      1.11            0.65
 # Skylake      1.13/+120%      0.96            0.51
 # Silvermont   2.83/+95%       -
+# Goldmont     1.70/+180%      -
 # VIA Nano     1.82/+150%      -
 # Sledgehammer 1.38/+160%      -
 # Bulldozer    2.30/+130%      0.97
index 66054ceeae97d18f31a8ec324a726b70cfbba54d..f06fa515a29f815f524b1a66d6462f6612e9625a 100755 (executable)
 # VIA Nano     9.32            7.15/+30%
 # Atom         10.3            9.17/+12%
 # Silvermont   13.1(*)         9.37/+40%
+# Goldmont     8.13            6.42/+27%       1.70/+380%(**)
 #
 # (*)  obviously suboptimal result, nothing was done about it,
 #      because SSSE3 code is compiled unconditionally;
+# (**) SHAEXT result
 
 $flavour = shift;
 $output  = shift;
index 94cc0114f88a40d2843e018780cb6c07b3615f2c..448ac73e0604b7c6f86ac66e8b692e10c9ad40c5 100644 (file)
@@ -36,6 +36,7 @@
 # VIA Nano     91      -       52      33      14.7
 # Atom         126     -       68      48(***) 14.7
 # Silvermont   97      -       58      42(***) 17.5
+# Goldmont     80      -       48      19.5    12.0
 #
 # (*)  whichever best applicable.
 # (**) x86_64 assembler performance is presented for reference
index 01bbb7775f5d538c38b023751a16709197b3c901..5a1cbcf0ca8d953776b33d04e0e616067de561b8 100755 (executable)
@@ -98,8 +98,9 @@
 # VIA Nano     23.0    16.5(+39%)  -               14.7    -
 # Atom         23.0    18.9(+22%)  -               14.7    -
 # Silvermont   27.4    20.6(+33%)  -               17.5    -
+# Goldmont     18.9    14.3(+32%)  4.16(+350%)     12.0    -
 #
-# (*)  whichever best applicable;
+# (*)  whichever best applicable, including SHAEXT;
 # (**) switch from ror to shrd stands for fair share of improvement;
 # (***)        execution time is fully determined by remaining integer-only
 #      part, body_00_15; reducing the amount of SIMD instructions