x86[_64] assembly pack: add Silvermont performance data.

author Andy Polyakov <appro@openssl.org>

Sat, 30 Aug 2014 17:13:49 +0000 (19:13 +0200)

committer Andy Polyakov <appro@openssl.org>

Sat, 30 Aug 2014 17:14:49 +0000 (19:14 +0200)
author Andy Polyakov <appro@openssl.org>
Sat, 30 Aug 2014 17:13:49 +0000 (19:13 +0200)
committer Andy Polyakov <appro@openssl.org>
Sat, 30 Aug 2014 17:14:49 +0000 (19:14 +0200)
diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl

index d2c3978b962e451f0e901f0e578138813c5ef08c..3f7d33c45bce7154a54789eb6c1a6902b17ad3fa 100644 (file)
--- a/crypto/aes/asm/bsaes-x86_64.pl
+++ b/crypto/aes/asm/bsaes-x86_64.pl
@@ -40,6 +40,7 @@
  # Core 2       9.30            8.69            +7%
  # Nehalem(**)  7.63            6.88            +11%
  # Atom         17.1            16.4            +4%
+# Silvermont   -               12.9
  #
  # (*)  Comparison is not completely fair, because "this" is ECB,
  #      i.e. no extra processing such as counter values calculation
@@ -78,6 +79,7 @@
  # Core 2       9.98
  # Nehalem      7.80
  # Atom         17.9
+# Silvermont   14.0
  #
  # November 2011.
  #
diff --git a/crypto/aes/asm/vpaes-x86.pl b/crypto/aes/asm/vpaes-x86.pl

index bacf42cf0f7fb0d3439029a6e18efd9467b4cd4e..2ba149c3f9d599ce87516672ab9cea657395b4ef 100644 (file)
--- a/crypto/aes/asm/vpaes-x86.pl
+++ b/crypto/aes/asm/vpaes-x86.pl
@@ -30,6 +30,7 @@
  # Core 2(**)   28.1/41.4/18.3          21.9/25.2(***)
  # Nehalem      27.9/40.4/18.1          10.2/11.9
  # Atom         70.7/92.1/60.1          61.1/75.4(***)
+# Silvermont   45.4/62.9/24.1          49.2/61.1(***)
  #
  # (*)  "Hyper-threading" in the context refers rather to cache shared
  #      among multiple cores, than to specifically Intel HTT. As vast
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl

index 40ef342d97b5f3fed725854ec9d45c2ff553d77f..f2ef318fae4e131c68a97f56ad039218e48e1030 100644 (file)
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -30,6 +30,7 @@
  # Core 2(**)   29.6/41.1/14.3          21.9/25.2(***)
  # Nehalem      29.6/40.3/14.6          10.0/11.8
  # Atom         57.3/74.2/32.1          60.9/77.2(***)
+# Silvermont   52.7/64.0/19.5          48.8/60.8(***)
  #
  # (*)  "Hyper-threading" in the context refers rather to cache shared
  #      among multiple cores, than to specifically Intel HTT. As vast
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl

index 1e7922733821b581c730b887f4bd5ce59c75d974..ce7d1cb8baca6a6988aff9400d1f42240ab7de22 100644 (file)
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -64,6 +64,7 @@
  # Ivy Bridge   1.80(+7%)
  # Haswell      0.55(+93%) (if system doesn't support AVX)
  # Bulldozer    1.49(+27%)
+# Silvermont   2.88(+13%)
  
  # March 2013
  #
diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl

index 59da867848237930c0548054b2d8af76049ac449..8377299b1e3383f68896d53b69993fc0a3a1e31c 100644 (file)
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@@ -93,16 +93,19 @@
  # P4           10.6            -
  # AMD K8       7.1             -
  # Core2                7.3             6.0/+22%        -
-# Atom         12.5            9.3(*)/+35%     -
  # Westmere     7.3             5.5/+33%        -
  # Sandy Bridge 8.8             6.2/+40%        5.1(**)/+73%
  # Ivy Bridge   7.2             4.8/+51%        4.7(**)/+53%
  # Haswell      6.5             4.3/+51%        4.1(**)/+58%
  # Bulldozer    11.6            6.0/+92%
  # VIA Nano     10.6            7.5/+41%
+# Atom         12.5            9.3(*)/+35%
+# Silvermont   14.5            9.9(*)/+46%
  #
  # (*)  Loop is 1056 instructions long and expected result is ~8.25.
-#      It remains mystery [to me] why ILP is limited to 1.7.
+#      The discrepancy is because of front-end limitations, so
+#      called MS-ROM penalties, and on Silvermont even rotate's
+#      limited parallelism.
  #
  # (**) As per above comment, the result is for AVX *plus* sh[rl]d.
  
diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl

index 0c2a778e7cc914d7af8d5c78b9b96d3c6d6c1185..6462e45ba75bee72aa6f85f2d8843b67db5ee2d2 100644 (file)
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -53,6 +53,7 @@
  # Bulldozer    36      -       27/22           17.0    13.6
  # VIA Nano     36      -       25/22           16.8    16.5
  # Atom         50      -       30/25           21.9    18.9
+# Silvermont   40      -       34/31           22.9    20.6
  #
  # (*)  numbers after slash are for unrolled loop, where applicable;
  # (**) x86_64 assembly performance is presented for reference
diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl

index 9fc792964f48c69286c53c335cc894c437c4492c..e96ec00314a486766e9f1ab1f813d3dadfe9dae6 100644 (file)
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -28,6 +28,7 @@
  # Bulldozer    121     -       50      14.0    13.5
  # VIA Nano     91      -       52      33      14.7
  # Atom         126     -       68      48(***) 14.7
+# Silvermont   97      -       58      42(***) 17.5
  #
  # (*)  whichever best applicable.
  # (**) x86_64 assembler performance is presented for reference
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl

index 476e99fee8f4d6508a9e9f059d11cb7c1c258c4d..b7b44b4411362db25c58b9d2bac80e0782885ceb 100755 (executable)
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -89,6 +89,7 @@
  # Bulldozer    21.1    13.6(+54%)  13.6(+54%(***)) 13.5    8.58(+57%)
  # VIA Nano     23.0    16.5(+39%)  -               14.7    -
  # Atom         23.0    18.9(+22%)  -               14.7    -
+# Silvermont   27.4    20.6(+33%)  -               17.5    -
  #
  # (*)  whichever best applicable;
  # (**) switch from ror to shrd stands for fair share of improvement;
author	Andy Polyakov <appro@openssl.org>
	Sat, 30 Aug 2014 17:13:49 +0000 (19:13 +0200)
committer	Andy Polyakov <appro@openssl.org>
	Sat, 30 Aug 2014 17:14:49 +0000 (19:14 +0200)
crypto/aes/asm/bsaes-x86_64.pl		patch \| blob \| history
crypto/aes/asm/vpaes-x86.pl		patch \| blob \| history
crypto/aes/asm/vpaes-x86_64.pl		patch \| blob \| history
crypto/modes/asm/ghash-x86_64.pl		patch \| blob \| history
crypto/sha/asm/sha1-586.pl		patch \| blob \| history
crypto/sha/asm/sha256-586.pl		patch \| blob \| history
crypto/sha/asm/sha512-586.pl		patch \| blob \| history
crypto/sha/asm/sha512-x86_64.pl		patch \| blob \| history