projects
/
openssl.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
42b9a41
)
aesni-sha1-x86_64.pl: update performance data.
author
Andy Polyakov
<appro@openssl.org>
Mon, 10 Jun 2013 20:35:22 +0000
(22:35 +0200)
committer
Andy Polyakov
<appro@openssl.org>
Mon, 10 Jun 2013 20:35:22 +0000
(22:35 +0200)
crypto/aes/asm/aesni-sha1-x86_64.pl
patch
|
blob
|
history
diff --git
a/crypto/aes/asm/aesni-sha1-x86_64.pl
b/crypto/aes/asm/aesni-sha1-x86_64.pl
index 031e19be653dff6b0433c00668eeaba3b594de3f..2d1a222ac246221fcfc6b65b08c242cec34b5e0b 100644
(file)
--- a/
crypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/
crypto/aes/asm/aesni-sha1-x86_64.pl
@@
-22,20
+22,23
@@
#
# AES-128-CBC +SHA1 stitch gain
# Westmere 3.77[+5.5] 9.26 6.58 +41%
#
# AES-128-CBC +SHA1 stitch gain
# Westmere 3.77[+5.5] 9.26 6.58 +41%
-# Sandy Bridge 5.05[+5.0(6.2)] 10.06(11.21)
6.09(7.05) +65
%(+59%)
+# Sandy Bridge 5.05[+5.0(6.2)] 10.06(11.21)
5.98(7.05) +68
%(+59%)
# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74%
# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74%
+# Haswell 4.43[+3.6(4.4)] 8.00(8.80) 4.55(5.21) +75%(+69%)
# Bulldozer 5.77[+6.0] 11.72 6.37 +84%
#
# AES-192-CBC
# Westmere 4.51 10.00 6.87 +46%
# Sandy Bridge 6.05 11.06(12.21) 6.11(7.20) +81%(+70%)
# Ivy Bridge 6.05 10.65 6.07 +75%
# Bulldozer 5.77[+6.0] 11.72 6.37 +84%
#
# AES-192-CBC
# Westmere 4.51 10.00 6.87 +46%
# Sandy Bridge 6.05 11.06(12.21) 6.11(7.20) +81%(+70%)
# Ivy Bridge 6.05 10.65 6.07 +75%
+# Haswell 5.29 8.86(9.65) 5.32(5.32) +67%(+81%)
# Bulldozer 6.89 12.84 6.96 +84%
#
# AES-256-CBC
# Westmere 5.25 10.74 7.19 +49%
# Sandy Bridge 7.05 12.06(13.21) 7.12(7.68) +69%(+72%)
# Ivy Bridge 7.05 11.65 7.12 +64%
# Bulldozer 6.89 12.84 6.96 +84%
#
# AES-256-CBC
# Westmere 5.25 10.74 7.19 +49%
# Sandy Bridge 7.05 12.06(13.21) 7.12(7.68) +69%(+72%)
# Ivy Bridge 7.05 11.65 7.12 +64%
+# Haswell 6.19 9.76(10.6) 6.21(6.41) +57%(+65%)
# Bulldozer 8.00 13.95 8.25 +69%
#
# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for
# Bulldozer 8.00 13.95 8.25 +69%
#
# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for
@@
-51,9
+54,10
@@
# standalone AESNI-CBC decrypt:
#
# AES-128-CBC AES-192-CBC AES-256-CBC
# standalone AESNI-CBC decrypt:
#
# AES-128-CBC AES-192-CBC AES-256-CBC
-# Westmere 1.
31 1.55 1.80
+# Westmere 1.
25 1.50 1.75
# Sandy Bridge 0.74 0.91 1.09
# Ivy Bridge 0.74 0.90 1.11
# Sandy Bridge 0.74 0.91 1.09
# Ivy Bridge 0.74 0.90 1.11
+# Haswell 0.63 0.76 0.88
# Bulldozer 0.70 0.85 0.99
$flavour = shift;
# Bulldozer 0.70 0.85 0.99
$flavour = shift;
@@
-94,7
+98,7
@@
$code.=<<___;
.globl aesni_cbc_sha1_enc
.type aesni_cbc_sha1_enc,\@abi-omnipotent
.globl aesni_cbc_sha1_enc
.type aesni_cbc_sha1_enc,\@abi-omnipotent
-.align
16
+.align
32
aesni_cbc_sha1_enc:
# caller should check for SSSE3 and AES-NI bits
mov OPENSSL_ia32cap_P+0(%rip),%r10d
aesni_cbc_sha1_enc:
# caller should check for SSSE3 and AES-NI bits
mov OPENSSL_ia32cap_P+0(%rip),%r10d
@@
-144,7
+148,7
@@
my $_ror=sub { &ror(@_) };
$code.=<<___;
.type aesni_cbc_sha1_enc_ssse3,\@function,6
$code.=<<___;
.type aesni_cbc_sha1_enc_ssse3,\@function,6
-.align
16
+.align
32
aesni_cbc_sha1_enc_ssse3:
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact
aesni_cbc_sha1_enc_ssse3:
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact
@@
-570,7
+574,7
@@
sub body_40_59 () { # ((b^c)&(c^d))^c
return @r;
}
$code.=<<___;
return @r;
}
$code.=<<___;
-.align
16
+.align
32
.Loop_ssse3:
___
&Xupdate_ssse3_16_31(\&body_00_19);
.Loop_ssse3:
___
&Xupdate_ssse3_16_31(\&body_00_19);
@@
-618,7
+622,6
@@
$code.=<<___;
and @T[1],@T[0]
jmp .Loop_ssse3
and @T[1],@T[0]
jmp .Loop_ssse3
-.align 16
.Ldone_ssse3:
___
$jj=$j=$saved_j; @V=@saved_V;
.Ldone_ssse3:
___
$jj=$j=$saved_j; @V=@saved_V;
@@
-689,7
+692,7
@@
my $_ror=sub { &shrd(@_[0],@_) };
$code.=<<___;
.type aesni_cbc_sha1_enc_avx,\@function,6
$code.=<<___;
.type aesni_cbc_sha1_enc_avx,\@function,6
-.align
16
+.align
32
aesni_cbc_sha1_enc_avx:
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact
aesni_cbc_sha1_enc_avx:
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact
@@
-773,14
+776,14
@@
my $aesenc=sub {
my ($n,$k)=($r/10,$r%10);
if ($k==0) {
$code.=<<___;
my ($n,$k)=($r/10,$r%10);
if ($k==0) {
$code.=<<___;
- vmov
ups
`16*$n`($in0),$in # load input
- v
xorps
$rndkey[1],$in,$in
+ vmov
dqu
`16*$n`($in0),$in # load input
+ v
pxor
$rndkey[1],$in,$in
___
$code.=<<___ if ($n);
vmovups $iv,`16*($n-1)`($out,$in0) # write output
___
$code.=<<___;
___
$code.=<<___ if ($n);
vmovups $iv,`16*($n-1)`($out,$in0) # write output
___
$code.=<<___;
- v
xorps
$in,$iv,$iv
+ v
pxor
$in,$iv,$iv
vaesenc $rndkey[0],$iv,$iv
vmovups `32+16*$k-112`($key),$rndkey[1]
___
vaesenc $rndkey[0],$iv,$iv
vmovups `32+16*$k-112`($key),$rndkey[1]
___
@@
-1014,7
+1017,7
@@
sub Xtail_avx()
}
$code.=<<___;
}
$code.=<<___;
-.align
16
+.align
32
.Loop_avx:
___
&Xupdate_avx_16_31(\&body_00_19);
.Loop_avx:
___
&Xupdate_avx_16_31(\&body_00_19);
@@
-1062,7
+1065,6
@@
$code.=<<___;
and @T[1],@T[0]
jmp .Loop_avx
and @T[1],@T[0]
jmp .Loop_avx
-.align 16
.Ldone_avx:
___
$jj=$j=$saved_j; @V=@saved_V;
.Ldone_avx:
___
$jj=$j=$saved_j; @V=@saved_V;