projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Update copyright year
[openssl.git]
/
crypto
/
sha
/
asm
/
sha256-586.pl
diff --git
a/crypto/sha/asm/sha256-586.pl
b/crypto/sha/asm/sha256-586.pl
index 6af1d84beb2bc77aa7d0ea2aae9f10e124aad4aa..dccc771ad584b8515180d1900bbdd8ab18d57f73 100644
(file)
--- a/
crypto/sha/asm/sha256-586.pl
+++ b/
crypto/sha/asm/sha256-586.pl
@@
-1,5
+1,5
@@
#! /usr/bin/env perl
#! /usr/bin/env perl
-# Copyright 2007-201
6
The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2007-201
8
The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@
-18,7
+18,7
@@
#
# Performance improvement over compiler generated code varies from
# 10% to 40% [see below]. Not very impressive on some µ-archs, but
#
# Performance improvement over compiler generated code varies from
# 10% to 40% [see below]. Not very impressive on some µ-archs, but
-# it's 5 times smaller and optimiz
i
es amount of writes.
+# it's 5 times smaller and optimizes amount of writes.
#
# May 2012.
#
#
# May 2012.
#
@@
-47,7
+47,7
@@
#
# Performance in clock cycles per processed byte (less is better):
#
#
# Performance in clock cycles per processed byte (less is better):
#
-# gcc icc x86 asm(*) SIMD x86_64 asm(**)
+# gcc icc x86 asm(*) SIMD x86_64 asm(**)
# Pentium 46 57 40/38 - -
# PIII 36 33 27/24 - -
# P4 41 38 28 - 17.3
# Pentium 46 57 40/38 - -
# PIII 36 33 27/24 - -
# P4 41 38 28 - 17.3
@@
-57,14
+57,17
@@
# Sandy Bridge 25 - 15.9 12.4 11.6
# Ivy Bridge 24 - 15.0 11.4 10.3
# Haswell 22 - 13.9 9.46 7.80
# Sandy Bridge 25 - 15.9 12.4 11.6
# Ivy Bridge 24 - 15.0 11.4 10.3
# Haswell 22 - 13.9 9.46 7.80
+# Skylake 20 - 14.9 9.50 7.70
# Bulldozer 36 - 27/22 17.0 13.6
# VIA Nano 36 - 25/22 16.8 16.5
# Atom 50 - 30/25 21.9 18.9
# Silvermont 40 - 34/31 22.9 20.6
# Bulldozer 36 - 27/22 17.0 13.6
# VIA Nano 36 - 25/22 16.8 16.5
# Atom 50 - 30/25 21.9 18.9
# Silvermont 40 - 34/31 22.9 20.6
+# Goldmont 29 - 20 16.3(***)
#
# (*) numbers after slash are for unrolled loop, where applicable;
# (**) x86_64 assembly performance is presented for reference
# purposes, results are best-available;
#
# (*) numbers after slash are for unrolled loop, where applicable;
# (**) x86_64 assembly performance is presented for reference
# purposes, results are best-available;
+# (***) SHAEXT result is 4.1, strangely enough better than 64-bit one;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
@@
-73,7
+76,7
@@
require "x86asm.pl";
$output=pop;
open STDOUT,">$output";
$output=pop;
open STDOUT,">$output";
-&asm_init($ARGV[0],
"sha512-586.pl",
$ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");
$xmm=$avx=0;
for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); }
$xmm=$avx=0;
for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); }
@@
-93,7
+96,7
@@
if ($xmm && !$avx && $ARGV[0] eq "win32" &&
$avx = ($1>=10) + ($1>=11);
}
$avx = ($1>=10) + ($1>=11);
}
-if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(
^clang
version|based on LLVM) ([3-9]\.[0-9]+)/) {
+if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(
(?:^clang|LLVM)
version|based on LLVM) ([3-9]\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
$avx = ($2>=3.0) + ($2>3.0);
}
@@
-276,7
+279,7
@@
my $suffix=shift;
&mov ($Coff,"ecx");
&mov ($Doff,"edi");
&mov (&DWP(0,"esp"),"ebx"); # magic
&mov ($Coff,"ecx");
&mov ($Doff,"edi");
&mov (&DWP(0,"esp"),"ebx"); # magic
- &mov ($E,&DWP(16,"esi"));
+ &mov ($E,&DWP(16,"esi"));
&mov ("ebx",&DWP(20,"esi"));
&mov ("ecx",&DWP(24,"esi"));
&mov ("edi",&DWP(28,"esi"));
&mov ("ebx",&DWP(20,"esi"));
&mov ("ecx",&DWP(24,"esi"));
&mov ("edi",&DWP(28,"esi"));
@@
-385,7
+388,7
@@
my @AH=($A,$K256);
&xor ($AH[1],"ecx"); # magic
&mov (&DWP(8,"esp"),"ecx");
&mov (&DWP(12,"esp"),"ebx");
&xor ($AH[1],"ecx"); # magic
&mov (&DWP(8,"esp"),"ecx");
&mov (&DWP(12,"esp"),"ebx");
- &mov ($E,&DWP(16,"esi"));
+ &mov ($E,&DWP(16,"esi"));
&mov ("ebx",&DWP(20,"esi"));
&mov ("ecx",&DWP(24,"esi"));
&mov ("esi",&DWP(28,"esi"));
&mov ("ebx",&DWP(20,"esi"));
&mov ("ecx",&DWP(24,"esi"));
&mov ("esi",&DWP(28,"esi"));