x86_64 assembly pack: tune clang version detection even further.
[openssl.git] / crypto / ec / asm / ecp_nistz256-x86_64.pl
index cdff22af2c84bd222596ee4a524f0d9153674b53..c2621c2bc84d9a6899512f57b48667bf745cf6fc 100755 (executable)
 
 # Further optimization by <appro@openssl.org>:
 #
-#              this/original
-# Opteron      +12-49%
-# Bulldozer    +14-45%
-# P4           +18-46%
-# Westmere     +12-34%
-# Sandy Bridge +9-35%
-# Ivy Bridge   +9-35%
-# Haswell      +8-37%
-# Broadwell    +18-58%
-# Atom         +15-50%
-# VIA Nano     +43-160%
+#              this/original   with/without -DECP_NISTZ256_ASM(*)
+# Opteron      +12-49%         +110-150%
+# Bulldozer    +14-45%         +175-210%
+# P4           +18-46%         n/a :-(
+# Westmere     +12-34%         +80-87%
+# Sandy Bridge +9-35%          +110-120%
+# Ivy Bridge   +9-35%          +110-125%
+# Haswell      +8-37%          +140-160%
+# Broadwell    +18-58%         +145-210%
+# Atom         +15-50%         +130-180%
+# VIA Nano     +43-160%        +300-480%
+#
+# (*)  "without -DECP_NISTZ256_ASM" refers to build with
+#      "enable-ec_nistp_64_gcc_128";
 #
 # Ranges denote minimum and maximum improvement coefficients depending
-# on benchmark. Lower coefficients are for ECDSA sign, relatively
-# fastest server-side operation.
+# on benchmark. Lower coefficients are for ECDSA sign, relatively fastest
+# server-side operation. Keep in mind that +100% means 2x improvement.
 
 $flavour = shift;
 $output  = shift;
@@ -78,7 +81,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
        $addx = ($1>=12);
 }
 
-if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9])\.([0-9]+)/) {
        my $ver = $2 + $3/100.0;        # 3.1->3.01, 3.10->3.10
        $avx = ($ver>=3.0) + ($ver>=3.01);
        $addx = ($ver>=3.03);