x86 assembly pack: update performance results.
authorAndy Polyakov <appro@openssl.org>
Sat, 17 Dec 2016 18:10:00 +0000 (19:10 +0100)
committerAndy Polyakov <appro@openssl.org>
Mon, 19 Dec 2016 15:18:25 +0000 (16:18 +0100)
Reviewed-by: Richard Levitte <levitte@openssl.org>
crypto/aes/asm/aesni-x86.pl
crypto/chacha/asm/chacha-x86.pl
crypto/chacha/asm/chacha-x86_64.pl
crypto/poly1305/asm/poly1305-x86.pl
crypto/sha/asm/sha1-586.pl
crypto/sha/asm/sha256-586.pl
crypto/sha/asm/sha512-586.pl

index c34d9bf4afec24d1b45c8a48b140c451e27f5774..b1eca6351937e9cb32d001eacca84b03af52c294 100644 (file)
@@ -62,7 +62,9 @@
 # Westmere     3.77/1.37       1.37    1.52    1.27
 # * Bridge     5.07/0.98       0.99    1.09    0.91    1.10
 # Haswell      4.44/0.80       0.97    1.03    0.72    0.76
+# Skylake      2.68/0.65       0.65    0.66    0.64    0.66
 # Silvermont   5.77/3.56       3.67    4.03    3.46    4.03
+# Goldmont     3.84/1.39       1.39    1.63    1.31    1.70
 # Bulldozer    5.80/0.98       1.05    1.24    0.93    1.23
 
 $PREFIX="aesni";       # if $PREFIX is set to "AES", the script
index 61b328612b7cf147380b65e0ffa6002cb7dba9b1..d606db8a61b0b3ed21bed836e999e9bdb0a4f703 100755 (executable)
@@ -28,6 +28,7 @@
 # Westmere     9.50/+45%       3.35
 # Sandy Bridge 10.5/+47%       3.20
 # Haswell      8.15/+50%       2.83
+# Skylake      7.53/+22%       2.75
 # Silvermont   17.4/+36%       8.35
 # Goldmont     13.4/+40%       4.36
 # Sledgehammer 10.2/+54%
index a32d3dc53b49bb9dd8682800c64e965e03f9e25d..fd3fdeb10c7572bd0dc6e1b99fad24488719cab0 100755 (executable)
@@ -32,6 +32,7 @@
 # Sandy Bridge 8.31/+42%       5.45/6.76       2.72
 # Ivy Bridge   6.71/+46%       5.40/6.49       2.41
 # Haswell      5.92/+43%       5.20/6.45       2.42        1.23
+# Skylake      5.87/+39%       4.70/-          2.31        1.19
 # Silvermont   12.0/+33%       7.75/7.40       7.03(iii)
 # Goldmont     10.6/+17%       5.10/-          3.28
 # Sledgehammer 7.28/+52%       -/14.2(ii)      -
index ab24dfcfaddaaa082f36382d30dd62adf777581f..9db38b5ecc4b60ce79462ad1a2c070ed50d47dd0 100755 (executable)
@@ -29,6 +29,7 @@
 # Westmere     4.58/+100%      1.43
 # Sandy Bridge 3.90/+100%      1.36
 # Haswell      3.88/+70%       1.18            0.72
+# Skylake      3.10/+60%       1.14            0.62
 # Silvermont   11.0/+40%       4.80
 # Goldmont     4.10/+200%      2.10
 # VIA Nano     6.71/+90%       2.47
index 3bf8200dbb5c109219d43af834aebcd78deeb741..c753ed30797166cf096ab67b6e27241b332a713d 100644 (file)
 # Sandy Bridge 8.8             6.2/+40%        5.1(**)/+73%
 # Ivy Bridge   7.2             4.8/+51%        4.7(**)/+53%
 # Haswell      6.5             4.3/+51%        4.1(**)/+58%
+# Skylake      6.4             4.1/+55%        4.1(**)/+55%
 # Bulldozer    11.6            6.0/+92%
 # VIA Nano     10.6            7.5/+41%
 # Atom         12.5            9.3(*)/+35%
 # Silvermont   14.5            9.9(*)/+46%
+# Goldmont     8.8             6.7/+30%        1.7(***)/+415%
 #
 # (*)  Loop is 1056 instructions long and expected result is ~8.25.
 #      The discrepancy is because of front-end limitations, so
 #      limited parallelism.
 #
 # (**) As per above comment, the result is for AVX *plus* sh[rl]d.
+#
+# (***)        SHAEXT result
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
index 8e7f4eecc3c802147b93f3d132033f12d40ac02d..705de2a648c3d667f40a0106896c635471610de9 100644 (file)
 # Sandy Bridge 25      -       15.9            12.4    11.6
 # Ivy Bridge   24      -       15.0            11.4    10.3
 # Haswell      22      -       13.9            9.46    7.80
+# Skylake      20      -       14.9            9.50    7.70
 # Bulldozer    36      -       27/22           17.0    13.6
 # VIA Nano     36      -       25/22           16.8    16.5
 # Atom         50      -       30/25           21.9    18.9
 # Silvermont   40      -       34/31           22.9    20.6
+# Goldmont     29      -       20              16.3(***)
 #
 # (*)  numbers after slash are for unrolled loop, where applicable;
 # (**) x86_64 assembly performance is presented for reference
 #      purposes, results are best-available;
+# (***)        SHAEXT result is 4.1, strangely enough better than 64-bit one;
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
index 448ac73e0604b7c6f86ac66e8b692e10c9ad40c5..94017779a768917dfd01ae79f4dccdf31516c112 100644 (file)
@@ -32,6 +32,7 @@
 # Sandy Bridge 58      -       35      11.9    11.2
 # Ivy Bridge   50      -       33      11.5    8.17
 # Haswell      46      -       29      11.3    7.66
+# Skylake      40      -       26      13.3    7.25
 # Bulldozer    121     -       50      14.0    13.5
 # VIA Nano     91      -       52      33      14.7
 # Atom         126     -       68      48(***) 14.7