From: Andy Polyakov Date: Mon, 27 Sep 2004 09:37:03 +0000 (+0000) Subject: SHA1 asm Pentium tune-up. Performance loss is not as bad anymore. X-Git-Tag: BEN_FIPS_TEST_6~14^2~68 X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff_plain;h=c29ef588dc34417562dc9338cc3d222f9aea392b SHA1 asm Pentium tune-up. Performance loss is not as bad anymore. --- diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl index e00f709553..9fe1a7554d 100644 --- a/crypto/sha/asm/sha1-586.pl +++ b/crypto/sha/asm/sha1-586.pl @@ -9,7 +9,7 @@ # # compared with original compared with Intel cc # assembler impl. generated code -# Pentium -25% +37% +# Pentium -16% +48% # PIII/AMD +8% +16% # P4 +85%(!) +45% # @@ -104,19 +104,21 @@ sub BODY_00_15 &comment("00_15 $n"); - &mov($tmp1,$a); - &mov($f,$c); # f to hold F_00_19(b,c,d) + &mov($f,$c); # f to hold F_00_19(b,c,d) + if ($n==0) { &mov($tmp1,$a); } + else { &mov($a,$tmp1); } &rotl($tmp1,5); # tmp1=ROTATE(a,5) &xor($f,$d); &and($f,$b); - &rotr($b,2); # b=ROTATE(b,30) - &add($tmp1,$e); # tmp1+=e; - &mov($e,&swtmp($n)); # e becomes volatile and + &add($tmp1,$e); # tmp1+=e; + &mov($e,&swtmp($n)); # e becomes volatile and # is loaded with xi - &xor($f,$d); # f holds F_00_19(b,c,d) + &xor($f,$d); # f holds F_00_19(b,c,d) + &rotr($b,2); # b=ROTATE(b,30) &lea($tmp1,&DWP($K,$tmp1,$e,1));# tmp1+=K_00_19+xi - - &add($f,$tmp1); # f+=tmp1 + + if ($n==15) { &add($f,$tmp1); } # f+=tmp1 + else { &add($tmp1,$f); } } sub BODY_16_19 @@ -132,15 +134,15 @@ sub BODY_16_19 &xor($tmp1,$d); &xor($f,&swtmp($n2)); &and($tmp1,$b); # tmp1 holds F_00_19(b,c,d) - &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd - &rotr($b,2); # b=ROTATE(b,30) - &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) - &rotl($f,1); # f=ROATE(f,1) + &rotr($b,2); # b=ROTATE(b,30) + &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd + &rotl($f,1); # f=ROATE(f,1) + &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) &mov(&swtmp($n0),$f); # xi=f &lea($f,&DWP($K,$f,$e,1)); # f+=K_00_19+e &mov($e,$a); # e becomes volatile - &add($f,$tmp1); # f+=F_00_19(b,c,d) - &rotl($e,5); # e=ROTATE(a,5) + &rotl($e,5); # e=ROTATE(a,5) + &add($f,$tmp1); # f+=F_00_19(b,c,d) &add($f,$e); # f+=ROTATE(a,5) } @@ -151,20 +153,20 @@ sub BODY_20_39 &comment("20_39 $n"); local($n0,$n1,$n2,$n3,$np)=&Na($n); - &mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d) - &xor($f,&swtmp($n1)); - &rotr($b,2); # b=ROTATE(b,30) - &xor($f,&swtmp($n2)); - &xor($tmp1,$c); - &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd - &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) + &mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d) + &mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &rotr($b,2); # b=ROTATE(b,30) + &xor($f,&swtmp($n1)); + &xor($tmp1,$c); + &xor($f,&swtmp($n2)); + &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) + &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd &rotl($f,1); # f=ROTATE(f,1) + &add($tmp1,$e); &mov(&swtmp($n0),$f); # xi=f - &lea($f,&DWP($K,$f,$e,1)); # f+=K_20_39+e &mov($e,$a); # e becomes volatile &rotl($e,5); # e=ROTATE(a,5) - &add($f,$tmp1); # f+=F_20_39(b,c,d) + &lea($f,&DWP($K,$f,$tmp1,1)); # f+=K_20_39+e &add($f,$e); # f+=ROTATE(a,5) } @@ -176,14 +178,17 @@ sub BODY_40_59 local($n0,$n1,$n2,$n3,$np)=&Na($n); &mov($f,&swtmp($n0)); # f to hold Xupdate(xi,xa,xb,xc,xd) + &mov($tmp1,&swtmp($n1)); + &xor($f,$tmp1); + &mov($tmp1,&swtmp($n2)); + &xor($f,$tmp1); + &mov($tmp1,&swtmp($n3)); + &xor($f,$tmp1); # f holds xa^xb^xc^xd &mov($tmp1,$b); # tmp1 to hold F_40_59(b,c,d) - &xor($f,&swtmp($n1)); - &or($tmp1,$c); - &xor($f,&swtmp($n2)); - &and($tmp1,$d); - &xor($f,&swtmp($n3)); # f holds xa^xb^xc^xd &rotl($f,1); # f=ROTATE(f,1) + &or($tmp1,$c); &mov(&swtmp($n0),$f); # xi=f + &and($tmp1,$d); &lea($f,&DWP($K,$f,$e,1)); # f+=K_40_59+e &mov($e,$b); # e becomes volatile and is used # to calculate F_40_59(b,c,d) @@ -192,8 +197,8 @@ sub BODY_40_59 &or($tmp1,$e); # tmp1 holds F_40_59(b,c,d) &mov($e,$a); &rotl($e,5); # e=ROTATE(a,5) - &add($tmp1,$e); # tmp1+=ROTATE(a,5) - &add($f,$tmp1); # f+=tmp1; + &add($f,$tmp1); # f+=tmp1; + &add($f,$e); # f+=ROTATE(a,5) } sub BODY_60_79