X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha256-586.pl;h=dccc771ad584b8515180d1900bbdd8ab18d57f73;hp=6af1d84beb2bc77aa7d0ea2aae9f10e124aad4aa;hb=fd38836ba8158cb30f0731f8a61780ed4b5a6825;hpb=6aa36e8e5a062e31543e7796f0351ff9628832ce;ds=sidebyside

diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl
index 6af1d84beb..dccc771ad5 100644
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -18,7 +18,7 @@
 #
 # Performance improvement over compiler generated code varies from
 # 10% to 40% [see below]. Not very impressive on some Âµ-archs, but
-# it's 5 times smaller and optimizies amount of writes.
+# it's 5 times smaller and optimizes amount of writes.
 #
 # May 2012.
 #
@@ -47,7 +47,7 @@
 #
 # Performance in clock cycles per processed byte (less is better):
 #
-#		gcc	icc	x86 asm(*)	SIMD	x86_64 asm(**)	
+#		gcc	icc	x86 asm(*)	SIMD	x86_64 asm(**)
 # Pentium	46	57	40/38		-	-
 # PIII		36	33	27/24		-	-
 # P4		41	38	28		-	17.3
@@ -57,14 +57,17 @@
 # Sandy Bridge	25	-	15.9		12.4	11.6
 # Ivy Bridge	24	-	15.0		11.4	10.3
 # Haswell	22	-	13.9		9.46	7.80
+# Skylake	20	-	14.9		9.50	7.70
 # Bulldozer	36	-	27/22		17.0	13.6
 # VIA Nano	36	-	25/22		16.8	16.5
 # Atom		50	-	30/25		21.9	18.9
 # Silvermont	40	-	34/31		22.9	20.6
+# Goldmont	29	-	20		16.3(***)
 #
 # (*)	numbers after slash are for unrolled loop, where applicable;
 # (**)	x86_64 assembly performance is presented for reference
 #	purposes, results are best-available;
+# (***)	SHAEXT result is 4.1, strangely enough better than 64-bit one;
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
@@ -73,7 +76,7 @@ require "x86asm.pl";
 $output=pop;
 open STDOUT,">$output";
 
-&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");
 
 $xmm=$avx=0;
 for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -93,7 +96,7 @@ if ($xmm && !$avx && $ARGV[0] eq "win32" &&
 	$avx = ($1>=10) + ($1>=11);
 }
 
-if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9]\.[0-9]+)/) {
 	$avx = ($2>=3.0) + ($2>3.0);
 }
 
@@ -276,7 +279,7 @@ my $suffix=shift;
 	&mov	($Coff,"ecx");
 	&mov	($Doff,"edi");
 	&mov	(&DWP(0,"esp"),"ebx");	# magic
-	&mov	($E,&DWP(16,"esi"));	
+	&mov	($E,&DWP(16,"esi"));
 	&mov	("ebx",&DWP(20,"esi"));
 	&mov	("ecx",&DWP(24,"esi"));
 	&mov	("edi",&DWP(28,"esi"));
@@ -385,7 +388,7 @@ my @AH=($A,$K256);
 	&xor	($AH[1],"ecx");		# magic
 	&mov	(&DWP(8,"esp"),"ecx");
 	&mov	(&DWP(12,"esp"),"ebx");
-	&mov	($E,&DWP(16,"esi"));	
+	&mov	($E,&DWP(16,"esi"));
 	&mov	("ebx",&DWP(20,"esi"));
 	&mov	("ecx",&DWP(24,"esi"));
 	&mov	("esi",&DWP(28,"esi"));