aes/asm/aesni-sha*-x86_64.pl: add SHAEXT performance results.
authorAndy Polyakov <appro@openssl.org>
Mon, 10 Jul 2017 13:21:00 +0000 (15:21 +0200)
committerAndy Polyakov <appro@openssl.org>
Mon, 24 Jul 2017 21:29:46 +0000 (23:29 +0200)
Reviewed-by: Kurt Roeckx <kurt@roeckx.be>
(Merged from https://github.com/openssl/openssl/pull/3898)

crypto/aes/asm/aesni-sha1-x86_64.pl
crypto/aes/asm/aesni-sha256-x86_64.pl

index de4c410..b01a4c5 100644 (file)
@@ -34,6 +34,8 @@
 # Haswell      4.43[+3.6(4.2)] 8.00(8.58)      4.55(5.21)  +75%(+65%)
 # Skylake      2.63[+3.5(4.1)] 6.17(6.69)      4.23(4.44)  +46%(+51%)
 # Bulldozer    5.77[+6.0]      11.72           6.37        +84%
+# Ryzen(**)    2.71[+1.93]     4.64            2.74        +69%
+# Goldmont(**) 3.82[+1.70]     5.52            4.20        +31%
 #
 #              AES-192-CBC
 # Westmere     4.51            9.81            6.80        +44%
 # Sandy Bridge 7.05            12.06(13.15)    7.12(7.72)  +69%(+70%)
 # Ivy Bridge   7.05            11.65           7.12        +64%
 # Haswell      6.19            9.76(10.34)     6.21(6.25)  +57%(+65%)
-# Skylake      3.62            7.16(7.68)      4.56(4.76)  +57%(+61$)
+# Skylake      3.62            7.16(7.68)      4.56(4.76)  +57%(+61%)
 # Bulldozer    8.00            13.95           8.25        +69%
+# Ryzen(**)    3.71            5.64            3.72        +52%
+# Goldmont(**) 5.35            7.05            5.76        +22%
 #
 # (*)  There are two code paths: SSSE3 and AVX. See sha1-568.pl for
 #      background information. Above numbers in parentheses are SSSE3
 #      results collected on AVX-capable CPU, i.e. apply on OSes that
 #      don't support AVX.
+# (**) SHAEXT results.
 #
 # Needless to mention that it makes no sense to implement "stitched"
 # *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1
index 74ec844..ef46023 100644 (file)
 # for standalone AESNI-CBC encrypt, standalone SHA256, and stitched
 # subroutine:
 #
-#               AES-128/-192/-256+SHA256       this(**)gain
-# Sandy Bridge     5.05/6.05/7.05+11.6         13.0    +28%/36%/43%
-# Ivy Bridge       5.05/6.05/7.05+10.3         11.6    +32%/41%/50%
-# Haswell          4.43/5.29/6.19+7.80         8.79    +39%/49%/59%
-# Skylake          2.62/3.14/3.62+7.70         8.10    +27%/34%/40%
-# Bulldozer        5.77/6.89/8.00+13.7         13.7    +42%/50%/58%
+#               AES-128/-192/-256+SHA256   this(**)    gain
+# Sandy Bridge     5.05/6.05/7.05+11.6     13.0        +28%/36%/43%
+# Ivy Bridge       5.05/6.05/7.05+10.3     11.6        +32%/41%/50%
+# Haswell          4.43/5.29/6.19+7.80     8.79        +39%/49%/59%
+# Skylake          2.62/3.14/3.62+7.70     8.10        +27%/34%/40%
+# Bulldozer        5.77/6.89/8.00+13.7     13.7        +42%/50%/58%
+# Ryzen(***)       2.71/-/3.71+2.05        2.74/-/3.73 +74%/-/54%
+# Goldmont(***)            3.82/-/5.35+4.16        4.73/-/5.94 +69%/-/60%
 #
 # (*)  there are XOP, AVX1 and AVX2 code paths, meaning that
 #      Westmere is omitted from loop, this is because gain was not
 #      estimated high enough to justify the effort;
 # (**) these are EVP-free results, results obtained with 'speed
 #      -evp aes-256-cbc-hmac-sha256' will vary by percent or two;
+# (***)        these are SHAEXT results;
 
 $flavour = shift;
 $output  = shift;