projects
/
openssl.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
d2fd65f
)
sha256-586.pl: minor optimization, +0-2% on all CPUs, +7% on Westmere.
author
Andy Polyakov
<appro@openssl.org>
Sat, 17 Sep 2011 12:57:33 +0000
(12:57 +0000)
committer
Andy Polyakov
<appro@openssl.org>
Sat, 17 Sep 2011 12:57:33 +0000
(12:57 +0000)
crypto/sha/asm/sha256-586.pl
patch
|
blob
|
history
diff --git
a/crypto/sha/asm/sha256-586.pl
b/crypto/sha/asm/sha256-586.pl
index ecc8b69c75dd19599200d552836e929e24353833..928ec53123bfd1d9827c8a8545174abd60ed1bf7 100644
(file)
--- a/
crypto/sha/asm/sha256-586.pl
+++ b/
crypto/sha/asm/sha256-586.pl
@@
-14,8
+14,8
@@
# Pentium PIII P4 AMD K8 Core2
# gcc 46 36 41 27 26
# icc 57 33 38 25 23
# Pentium PIII P4 AMD K8 Core2
# gcc 46 36 41 27 26
# icc 57 33 38 25 23
-# x86 asm 40 30 3
5 20 20
-# x86_64 asm(*) - - 21 1
5.8 16.5
+# x86 asm 40 30 3
3 20 18
+# x86_64 asm(*) - - 21 1
6 16
#
# (*) x86_64 assembler performance is presented for reference
# purposes.
#
# (*) x86_64 assembler performance is presented for reference
# purposes.
@@
-48,20
+48,19
@@
sub BODY_00_15() {
my $in_16_63=shift;
&mov ("ecx",$E);
my $in_16_63=shift;
&mov ("ecx",$E);
- &add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7]
- &ror ("ecx",6);
- &mov ("edi",$E);
- &ror ("edi",11);
+ &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2])
+ &ror ("ecx",25-11);
&mov ("esi",$Foff);
&mov ("esi",$Foff);
- &xor ("ecx",
"edi"
);
- &ror ("e
di",25-11
);
+ &xor ("ecx",
$E
);
+ &ror ("e
cx",11-6
);
&mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
&mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
- &xor ("ecx","edi"); # Sigma1(e)
+ &xor ("ecx",$E);
+ &ror ("ecx",6); # Sigma1(e)
&mov ("edi",$Goff);
&add ($T,"ecx"); # T += Sigma1(e)
&mov ("edi",$Goff);
&add ($T,"ecx"); # T += Sigma1(e)
- &mov ($Eoff,$E); # modulo-scheduled
&xor ("esi","edi");
&xor ("esi","edi");
+ &mov ($Eoff,$E); # modulo-scheduled
&mov ("ecx",$A);
&and ("esi",$E);
&mov ($E,$Doff); # e becomes d, which is e in next iteration
&mov ("ecx",$A);
&and ("esi",$E);
&mov ($E,$Doff); # e becomes d, which is e in next iteration
@@
-69,14
+68,14
@@
sub BODY_00_15() {
&mov ("edi",$A);
&add ($T,"esi"); # T += Ch(e,f,g)
&mov ("edi",$A);
&add ($T,"esi"); # T += Ch(e,f,g)
- &ror ("ecx",2);
+ &ror ("ecx",2
2-13
);
&add ($T,$Hoff); # T += h
&add ($T,$Hoff); # T += h
- &ror ("edi",13);
+ &xor ("ecx",$A);
+ &ror ("ecx",13-2);
&mov ("esi",$Boff);
&mov ("esi",$Boff);
- &xor ("ecx",
"edi"
);
- &ror ("e
di",22-13);
+ &xor ("ecx",
$A
);
+ &ror ("e
cx",2); # Sigma0(a)
&add ($E,$T); # d += T
&add ($E,$T); # d += T
- &xor ("ecx","edi"); # Sigma0(a)
&mov ("edi",$Coff);
&add ($T,"ecx"); # T += Sigma0(a)
&mov ("edi",$Coff);
&add ($T,"ecx"); # T += Sigma0(a)
@@
-168,23
+167,22
@@
sub BODY_00_15() {
&set_label("16_63",16);
&mov ("esi",$T);
&mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
&set_label("16_63",16);
&mov ("esi",$T);
&mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
- &shr ($T,3);
- &ror ("esi",7);
- &xor ($T,"esi");
&ror ("esi",18-7);
&mov ("edi","ecx");
&ror ("esi",18-7);
&mov ("edi","ecx");
- &xor ($T,"esi"); # T = sigma0(X[-15])
+ &xor ("esi",$T);
+ &ror ("esi",7);
+ &shr ($T,3);
- &shr ("ecx",10);
- &mov ("esi",&DWP(4*(8+15+16),"esp"));
- &ror ("edi",17);
- &xor ("ecx","edi");
&ror ("edi",19-17);
&ror ("edi",19-17);
- &add ($T,"esi"); # T += X[-16]
- &xor ("edi","ecx") # sigma1(X[-2])
+ &xor ($T,"esi"); # T = sigma0(X[-15])
+ &xor ("edi","ecx");
+ &ror ("edi",17);
+ &shr ("ecx",10);
+ &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16]
+ &xor ("edi","ecx"); # sigma1(X[-2])
- &add ($T,"edi"); # T += sigma1(X[-2])
- # &add ($T,
&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1
)
+ &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
+ # &add ($T,
"edi"); # T += sigma1(X[-2]
)
# &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
&BODY_00_15(1);
# &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
&BODY_00_15(1);