x86_64 assembly pack: refine clang detection.

[openssl.git] / crypto / sha / asm / sha256-586.pl
diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl

index 09648a8207c5871ae9502580b60517907f2a6a92..0c2a778e7cc914d7af8d5c78b9b96d3c6d6c1185 100644 (file)
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -82,6 +82,12 @@ if ($xmm && !$avx && $ARGV[0] eq "win32" &&
         $avx = ($1>=10) + ($1>=11);
  }
  
+if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+$shaext=$xmm;  ### set to zero if compiling for 1.0.1
+
  $unroll_after = 64*4;  # If pre-evicted from L1P cache first spin of
                         # fully unrolled loop was measured to run about
                         # 3-4x slower. If slowdown coefficient is N and
@@ -205,8 +211,8 @@ sub BODY_00_15() {
         &jz     ($unroll_after?&label("no_xmm"):&label("loop"));
         &and    ("ecx",1<<30);          # mask "Intel CPU" bit
         &and    ("ebx",1<<28|1<<9);     # mask AVX and SSSE3 bits
-       &test   ("edx",1<<29)           if ($xmm);      # check for SHA
-       &jnz    (&label("shaext"))      if ($xmm);
+       &test   ("edx",1<<29)           if ($shaext);   # check for SHA
+       &jnz    (&label("shaext"))      if ($shaext);
         &or     ("ecx","ebx");
         &and    ("ecx",1<<28|1<<30);
         &cmp    ("ecx",1<<28|1<<30);
@@ -505,7 +511,7 @@ my @AH=($A,$K256);
  &function_end_A();
  }
                                                 if (!$i386 && $xmm) {{{
-{
+if ($shaext) {
  ######################################################################
  # Intel SHA Extensions implementation of SHA256 update function.
  #