X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha256-586.pl;h=dccc771ad584b8515180d1900bbdd8ab18d57f73;hp=6af1d84beb2bc77aa7d0ea2aae9f10e124aad4aa;hb=fd38836ba8158cb30f0731f8a61780ed4b5a6825;hpb=6aa36e8e5a062e31543e7796f0351ff9628832ce;ds=sidebyside diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl index 6af1d84beb..dccc771ad5 100644 --- a/crypto/sha/asm/sha256-586.pl +++ b/crypto/sha/asm/sha256-586.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -18,7 +18,7 @@ # # Performance improvement over compiler generated code varies from # 10% to 40% [see below]. Not very impressive on some µ-archs, but -# it's 5 times smaller and optimizies amount of writes. +# it's 5 times smaller and optimizes amount of writes. # # May 2012. # @@ -47,7 +47,7 @@ # # Performance in clock cycles per processed byte (less is better): # -# gcc icc x86 asm(*) SIMD x86_64 asm(**) +# gcc icc x86 asm(*) SIMD x86_64 asm(**) # Pentium 46 57 40/38 - - # PIII 36 33 27/24 - - # P4 41 38 28 - 17.3 @@ -57,14 +57,17 @@ # Sandy Bridge 25 - 15.9 12.4 11.6 # Ivy Bridge 24 - 15.0 11.4 10.3 # Haswell 22 - 13.9 9.46 7.80 +# Skylake 20 - 14.9 9.50 7.70 # Bulldozer 36 - 27/22 17.0 13.6 # VIA Nano 36 - 25/22 16.8 16.5 # Atom 50 - 30/25 21.9 18.9 # Silvermont 40 - 34/31 22.9 20.6 +# Goldmont 29 - 20 16.3(***) # # (*) numbers after slash are for unrolled loop, where applicable; # (**) x86_64 assembly performance is presented for reference # purposes, results are best-available; +# (***) SHAEXT result is 4.1, strangely enough better than 64-bit one; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); @@ -73,7 +76,7 @@ require "x86asm.pl"; $output=pop; open STDOUT,">$output"; -&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386"); +&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386"); $xmm=$avx=0; for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -93,7 +96,7 @@ if ($xmm && !$avx && $ARGV[0] eq "win32" && $avx = ($1>=10) + ($1>=11); } -if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { +if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9]\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } @@ -276,7 +279,7 @@ my $suffix=shift; &mov ($Coff,"ecx"); &mov ($Doff,"edi"); &mov (&DWP(0,"esp"),"ebx"); # magic - &mov ($E,&DWP(16,"esi")); + &mov ($E,&DWP(16,"esi")); &mov ("ebx",&DWP(20,"esi")); &mov ("ecx",&DWP(24,"esi")); &mov ("edi",&DWP(28,"esi")); @@ -385,7 +388,7 @@ my @AH=($A,$K256); &xor ($AH[1],"ecx"); # magic &mov (&DWP(8,"esp"),"ecx"); &mov (&DWP(12,"esp"),"ebx"); - &mov ($E,&DWP(16,"esi")); + &mov ($E,&DWP(16,"esi")); &mov ("ebx",&DWP(20,"esi")); &mov ("ecx",&DWP(24,"esi")); &mov ("esi",&DWP(28,"esi"));