X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fmodes%2Fasm%2Faesni-gcm-x86_64.pl;h=608c3f780577db0185a234048ee0bd4c78c648bc;hp=3781933917227dd127352fa8d7d066804dacba13;hb=b7f5503fa6e1feebec2ac12b8ddcb5b5672452a6;hpb=7a1a12232a84621271bf808107f3be9a2df5121a

diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl
index 3781933917..608c3f7805 100644
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -22,7 +22,11 @@
 # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
 # Locktyukhin of Intel Corp. who verified that it reduces shuffles
 # pressure with notable relative improvement, achieving 1.0 cycle per
-# byte processed with 128-bit key on Haswell processor.
+# byte processed with 128-bit key on Haswell processor, 0.74 - on
+# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
+# measurements for favourable packet size, one divisible by 96.
+# Applications using the EVP interface will observe a few percent
+# worse performance.]
 #
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
@@ -53,6 +57,10 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
 	$avx = ($1>=10) + ($1>=11);
 }
 
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+	$avx = ($2>=3.0) + ($2>3.0);
+}
+
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
@@ -88,7 +96,7 @@ _aesni_ctr32_ghash_6x:
 
 .align	32
 .Loop6x:
-	add		\$6<<24,$counter
+	add		\$`6<<24`,$counter
 	jc		.Lhandle_ctr32		# discard $inout[1-5]?
 	vmovdqu		0x00-0x20($Xip),$Hkey	# $Hkey^1
 	  vpaddb	$T2,$inout5,$T1		# next counter value
@@ -516,7 +524,7 @@ _aesni_ctr32_6x:
 	vmovups		0x10-0x80($key),$rndkey
 	lea		0x20-0x80($key),%r12
 	vpxor		$Z0,$T1,$inout0
-	add		\$6<<24,$counter
+	add		\$`6<<24`,$counter
 	jc		.Lhandle_ctr32_2
 	vpaddb		$T2,$T1,$inout1
 	vpaddb		$T2,$inout1,$inout2