Skylake performance results.
authorAndy Polyakov <appro@openssl.org>
Fri, 25 Sep 2015 12:00:46 +0000 (14:00 +0200)
committerAndy Polyakov <appro@openssl.org>
Sat, 26 Sep 2015 17:50:11 +0000 (19:50 +0200)
Reviewed-by: Matt Caswell <matt@openssl.org>
crypto/aes/asm/aesni-sha1-x86_64.pl
crypto/aes/asm/aesni-sha256-x86_64.pl
crypto/aes/asm/aesni-x86_64.pl
crypto/modes/asm/aesni-gcm-x86_64.pl
crypto/modes/asm/ghash-x86_64.pl
crypto/rc4/asm/rc4-md5-x86_64.pl
crypto/sha/asm/sha1-mb-x86_64.pl
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha256-mb-x86_64.pl
crypto/sha/asm/sha512-x86_64.pl

index 97992adca7c3480342e835788db88e886caf6e41..952b4cfdd668297ead9176cbc5ee68f3b3380fee 100644 (file)
@@ -25,6 +25,7 @@
 # Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15)    5.98(7.05)  +68%(+58%)
 # Ivy Bridge   5.05[+4.6]      9.65            5.54        +74%
 # Haswell      4.43[+3.6(4.2)] 8.00(8.58)      4.55(5.21)  +75%(+65%)
 # Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15)    5.98(7.05)  +68%(+58%)
 # Ivy Bridge   5.05[+4.6]      9.65            5.54        +74%
 # Haswell      4.43[+3.6(4.2)] 8.00(8.58)      4.55(5.21)  +75%(+65%)
+# Skylake      2.63[+3.5(4.1)] 6.17(6.69)      4.23(4.44)  +46%(+51%)
 # Bulldozer    5.77[+6.0]      11.72           6.37        +84%
 #
 #              AES-192-CBC
 # Bulldozer    5.77[+6.0]      11.72           6.37        +84%
 #
 #              AES-192-CBC
@@ -39,6 +40,7 @@
 # Sandy Bridge 7.05            12.06(13.15)    7.12(7.72)  +69%(+70%)
 # Ivy Bridge   7.05            11.65           7.12        +64%
 # Haswell      6.19            9.76(10.34)     6.21(6.25)  +57%(+65%)
 # Sandy Bridge 7.05            12.06(13.15)    7.12(7.72)  +69%(+70%)
 # Ivy Bridge   7.05            11.65           7.12        +64%
 # Haswell      6.19            9.76(10.34)     6.21(6.25)  +57%(+65%)
+# Skylake      3.62            7.16(7.68)      4.56(4.76)  +57%(+61$)
 # Bulldozer    8.00            13.95           8.25        +69%
 #
 # (*)  There are two code paths: SSSE3 and AVX. See sha1-568.pl for
 # Bulldozer    8.00            13.95           8.25        +69%
 #
 # (*)  There are two code paths: SSSE3 and AVX. See sha1-568.pl for
index 19b0433b3b1bdbac254cc6c2af30bd548f0c5442..74dad44408abde019d6299206116767124abf9f7 100644 (file)
@@ -25,6 +25,7 @@
 # Sandy Bridge     5.05/6.05/7.05+11.6         13.0    +28%/36%/43%
 # Ivy Bridge       5.05/6.05/7.05+10.3         11.6    +32%/41%/50%
 # Haswell          4.43/5.29/6.19+7.80         8.79    +39%/49%/59%
 # Sandy Bridge     5.05/6.05/7.05+11.6         13.0    +28%/36%/43%
 # Ivy Bridge       5.05/6.05/7.05+10.3         11.6    +32%/41%/50%
 # Haswell          4.43/5.29/6.19+7.80         8.79    +39%/49%/59%
+# Skylake          2.62/3.14/3.62+7.70         8.10    +27%/34%/40%
 # Bulldozer        5.77/6.89/8.00+13.7         13.7    +42%/50%/58%
 #
 # (*)  there are XOP, AVX1 and AVX2 code pathes, meaning that
 # Bulldozer        5.77/6.89/8.00+13.7         13.7    +42%/50%/58%
 #
 # (*)  there are XOP, AVX1 and AVX2 code pathes, meaning that
index 25ca574f6a2f90b04dccc0a7726df86772bcb996..6037e9e76e3f9ae7afd8c9e33c3ffe02b792d89e 100644 (file)
 # Westmere     3.77/1.25       1.25    1.25    1.26
 # * Bridge     5.07/0.74       0.75    0.90    0.85
 # Haswell      4.44/0.63       0.63    0.73    0.63
 # Westmere     3.77/1.25       1.25    1.25    1.26
 # * Bridge     5.07/0.74       0.75    0.90    0.85
 # Haswell      4.44/0.63       0.63    0.73    0.63
+# Skylake      2.62/0.63       0.63    0.63    0.63
 # Silvermont   5.75/3.54       3.56    4.12    3.87(*)
 # Bulldozer    5.77/0.70       0.72    0.90    0.70
 #
 # Silvermont   5.75/3.54       3.56    4.12    3.87(*)
 # Bulldozer    5.77/0.70       0.72    0.90    0.70
 #
index 7e4e04ea25300d2bae95694110773cb5d8d359f3..608c3f780577db0185a234048ee0bd4c78c648bc 100644 (file)
 # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
 # Locktyukhin of Intel Corp. who verified that it reduces shuffles
 # pressure with notable relative improvement, achieving 1.0 cycle per
 # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
 # Locktyukhin of Intel Corp. who verified that it reduces shuffles
 # pressure with notable relative improvement, achieving 1.0 cycle per
-# byte processed with 128-bit key on Haswell processor, and 0.74 -
-# on Broadwell. [Mentioned results are raw profiled measurements for
-# favourable packet size, one divisible by 96. Applications using the
-# EVP interface will observe a few percent worse performance.]
+# byte processed with 128-bit key on Haswell processor, 0.74 - on
+# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
+# measurements for favourable packet size, one divisible by 96.
+# Applications using the EVP interface will observe a few percent
+# worse performance.]
 #
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
 #
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
index 5a7ce39486fe9e4de1ab254abef3a445d36886d0..a63c923429f32e1794c482b53445636b3dfcd948 100644 (file)
@@ -64,6 +64,7 @@
 # Ivy Bridge   1.80(+7%)
 # Haswell      0.55(+93%) (if system doesn't support AVX)
 # Broadwell    0.45(+110%)(if system doesn't support AVX)
 # Ivy Bridge   1.80(+7%)
 # Haswell      0.55(+93%) (if system doesn't support AVX)
 # Broadwell    0.45(+110%)(if system doesn't support AVX)
+# Skylake      0.44(+110%)(if system doesn't support AVX)
 # Bulldozer    1.49(+27%)
 # Silvermont   2.88(+13%)
 
 # Bulldozer    1.49(+27%)
 # Silvermont   2.88(+13%)
 
@@ -74,8 +75,8 @@
 # CPUs such as Sandy and Ivy Bridge can execute it, the code performs
 # sub-optimally in comparison to above mentioned version. But thanks
 # to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
 # CPUs such as Sandy and Ivy Bridge can execute it, the code performs
 # sub-optimally in comparison to above mentioned version. But thanks
 # to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
-# it performs in 0.41 cycles per byte on Haswell processor, and in
-# 0.29 on Broadwell.
+# it performs in 0.41 cycles per byte on Haswell processor, in
+# 0.29 on Broadwell, and in 0.36 on Skylake.
 #
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 
 #
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 
index 49d5438457c4cdf81345dd7dd398318054269470..a92fe92269fd57d53cd87d10ddec220068b81ab7 100644 (file)
 # Core2                6.5     5.8     12.3    7.7     +60%
 # Westmere     4.3     5.2     9.5     7.0     +36%
 # Sandy Bridge 4.2     5.5     9.7     6.8     +43%
 # Core2                6.5     5.8     12.3    7.7     +60%
 # Westmere     4.3     5.2     9.5     7.0     +36%
 # Sandy Bridge 4.2     5.5     9.7     6.8     +43%
+# Ivy Bridge   4.1     5.2     9.3     6.0     +54%
+# Haswell      4.0     5.0     9.0     5.7     +60%
+# Skylake      6.3(**) 5.0     11.3    5.3     +110%
 # Atom         9.3     6.5     15.8    11.1    +42%
 # VIA Nano     6.3     5.4     11.7    8.6     +37%
 # Atom         9.3     6.5     15.8    11.1    +42%
 # VIA Nano     6.3     5.4     11.7    8.6     +37%
-# Ivy Bridge   4.1     5.2     9.3     6.0     +54%
 # Bulldozer    4.5     5.4     9.9     7.7     +29%
 #
 # (*)  rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
 #      is +53%...
 # Bulldozer    4.5     5.4     9.9     7.7     +29%
 #
 # (*)  rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
 #      is +53%...
+# (**) unidentified anomaly;
 
 my ($rc4,$md5)=(1,1);  # what to generate?
 my $D="#" if (!$md5);  # if set to "#", MD5 is stitched into RC4(),
 
 my ($rc4,$md5)=(1,1);  # what to generate?
 my $D="#" if (!$md5);  # if set to "#", MD5 is stitched into RC4(),
index a8ee075eaaa0a037a599aca20362172d8fdbd644..099c803ebc41e76ef6c90aeb209d31fd1687e6c7 100644 (file)
@@ -19,6 +19,7 @@
 # Sandy Bridge (8.16   +5.15=13.3)/n   4.99    5.98            +80%
 # Ivy Bridge   (8.08   +5.14=13.2)/n   4.60    5.54            +68%
 # Haswell(iii) (8.96   +5.00=14.0)/n   3.57    4.55            +160%
 # Sandy Bridge (8.16   +5.15=13.3)/n   4.99    5.98            +80%
 # Ivy Bridge   (8.08   +5.14=13.2)/n   4.60    5.54            +68%
 # Haswell(iii) (8.96   +5.00=14.0)/n   3.57    4.55            +160%
+# Skylake      (8.70   +5.00=13.7)/n   3.64    4.20            +145%
 # Bulldozer    (9.76   +5.76=15.5)/n   5.95    6.37            +64%
 #
 # (i)  multi-block CBC encrypt with 128-bit key;
 # Bulldozer    (9.76   +5.76=15.5)/n   5.95    6.37            +64%
 #
 # (i)  multi-block CBC encrypt with 128-bit key;
index 9bb6b498190fdf81f7de9ca1d6429191915ca59b..d6b0722f31af0ddc16ab2758cb88f36da412d118 100755 (executable)
@@ -73,6 +73,7 @@
 # Sandy Bridge 7.70            6.10/+26%       4.99/+54%
 # Ivy Bridge   6.06            4.67/+30%       4.60/+32%
 # Haswell      5.45            4.15/+31%       3.57/+53%
 # Sandy Bridge 7.70            6.10/+26%       4.99/+54%
 # Ivy Bridge   6.06            4.67/+30%       4.60/+32%
 # Haswell      5.45            4.15/+31%       3.57/+53%
+# Skylake      5.18            4.06/+28%       3.54/+46%
 # Bulldozer    9.11            5.95/+53%
 # VIA Nano     9.32            7.15/+30%
 # Atom         10.3            9.17/+12%
 # Bulldozer    9.11            5.95/+53%
 # VIA Nano     9.32            7.15/+30%
 # Atom         10.3            9.17/+12%
index adf2ddccd18b0c0fcc638580ff9695ee51cfb3c7..ab0f028bef0a58e7412de425d921235cc3ccb4a5 100644 (file)
@@ -19,6 +19,7 @@
 # Sandy Bridge (20.5   +5.15=25.7)/n   11.6    13.0            +103%
 # Ivy Bridge   (20.4   +5.14=25.5)/n   10.3    11.6            +82%
 # Haswell(iii) (21.0   +5.00=26.0)/n   7.80    8.79            +170%
 # Sandy Bridge (20.5   +5.15=25.7)/n   11.6    13.0            +103%
 # Ivy Bridge   (20.4   +5.14=25.5)/n   10.3    11.6            +82%
 # Haswell(iii) (21.0   +5.00=26.0)/n   7.80    8.79            +170%
+# Skylake      (18.9   +5.00=23.9)/n   7.70    8.17            +170%
 # Bulldozer    (21.6   +5.76=27.4)/n   13.6    13.7            +100%
 #
 # (i)  multi-block CBC encrypt with 128-bit key;
 # Bulldozer    (21.6   +5.76=27.4)/n   13.6    13.7            +100%
 #
 # (i)  multi-block CBC encrypt with 128-bit key;
index b7b44b4411362db25c58b9d2bac80e0782885ceb..04ab412398fbe0b90fe13193cba61cf94a2411f3 100755 (executable)
@@ -86,6 +86,7 @@
 # Sandy Bridge 17.4    14.2(+23%)  11.6(+50%(**))  11.2    8.10(+38%(**))
 # Ivy Bridge   12.6    10.5(+20%)  10.3(+22%)      8.17    7.22(+13%)
 # Haswell      12.2    9.28(+31%)  7.80(+56%)      7.66    5.40(+42%)
 # Sandy Bridge 17.4    14.2(+23%)  11.6(+50%(**))  11.2    8.10(+38%(**))
 # Ivy Bridge   12.6    10.5(+20%)  10.3(+22%)      8.17    7.22(+13%)
 # Haswell      12.2    9.28(+31%)  7.80(+56%)      7.66    5.40(+42%)
+# Skylake      11.4    9.03(+26%)  7.70(+48%)      7.25    5.20(+40%)
 # Bulldozer    21.1    13.6(+54%)  13.6(+54%(***)) 13.5    8.58(+57%)
 # VIA Nano     23.0    16.5(+39%)  -               14.7    -
 # Atom         23.0    18.9(+22%)  -               14.7    -
 # Bulldozer    21.1    13.6(+54%)  13.6(+54%(***)) 13.5    8.58(+57%)
 # VIA Nano     23.0    16.5(+39%)  -               14.7    -
 # Atom         23.0    18.9(+22%)  -               14.7    -