X-Git-Url: https://git.openssl.org/gitweb/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha512-mips.pl;h=89623098d7ac3826528745a24254222edca9eea6;hb=a356e488ad50ca9de3fc4955839441c5aff11ed3;hp=92c36ac16d84310cd232e8e6dc480a89cba7d889;hpb=1a002d88adfff6d4e26789fff6a52c22cd525126;p=openssl.git diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl index 92c36ac16d..89623098d7 100644 --- a/crypto/sha/asm/sha512-mips.pl +++ b/crypto/sha/asm/sha512-mips.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -17,6 +17,10 @@ # ~17%, but it comes for free, because it's same instruction sequence. # Improvement coefficients are for aligned input. +# September 2012. +# +# Add MIPS[32|64]R2 code (>25% less instructions). + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -83,6 +87,7 @@ if ($output =~ /512/) { $SLL="dsll"; # shift left logical $SRL="dsrl"; # shift right logical $ADDU="daddu"; + $ROTR="drotr"; @Sigma0=(28,34,39); @Sigma1=(14,18,41); @sigma0=( 7, 1, 8); # right shift first @@ -97,6 +102,7 @@ if ($output =~ /512/) { $SLL="sll"; # shift left logical $SRL="srl"; # shift right logical $ADDU="addu"; + $ROTR="rotr"; @Sigma0=( 2,13,22); @Sigma1=( 6,11,25); @sigma0=( 3, 7,18); # right shift first @@ -124,6 +130,10 @@ $code.=<<___ if ($i<15); ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) ___ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[0],@X[0] # byte swap($i) + rotr @X[0],@X[0],16 +#else srl $tmp0,@X[0],24 # byte swap($i) srl $tmp1,@X[0],8 andi $tmp2,@X[0],0xFF00 @@ -133,8 +143,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4); or @X[0],$tmp0 or $tmp1,$tmp2 or @X[0],$tmp1 +#endif ___ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8); +#if defined(_MIPS_ARCH_MIPS64R2) + dsbh @X[0],@X[0] # byte swap($i) + dshd @X[0],@X[0] +#else ori $tmp0,$zero,0xFF dsll $tmp2,$tmp0,32 or $tmp0,$tmp2 # 0x000000FF000000FF @@ -153,8 +168,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8); dsrl $tmp1,@X[0],32 dsll @X[0],32 or @X[0],$tmp1 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + xor $tmp2,$f,$g # $i + $ROTR $tmp0,$e,@Sigma1[0] + $ADDU $T1,$X[0],$h + $ROTR $tmp1,$e,@Sigma1[1] + and $tmp2,$e + $ROTR $h,$e,@Sigma1[2] + xor $tmp0,$tmp1 + $ROTR $tmp1,$a,@Sigma0[0] + xor $tmp2,$g # Ch(e,f,g) + xor $tmp0,$h # Sigma1(e) + + $ROTR $h,$a,@Sigma0[1] + $ADDU $T1,$tmp2 + $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] + xor $h,$tmp1 + $ROTR $tmp1,$a,@Sigma0[2] + $ADDU $T1,$tmp0 + and $tmp0,$b,$c + xor $h,$tmp1 # Sigma0(a) + xor $tmp1,$b,$c +#else $ADDU $T1,$X[0],$h # $i $SRL $h,$e,@Sigma1[0] xor $tmp2,$f,$g @@ -184,16 +222,15 @@ $code.=<<___; xor $h,$tmp1 $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` xor $h,$tmp0 - $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + and $tmp0,$b,$c xor $h,$tmp1 # Sigma0(a) - - or $tmp0,$a,$b - and $tmp1,$a,$b - and $tmp0,$c - or $tmp1,$tmp0 # Maj(a,b,c) + xor $tmp1,$b,$c +#endif + $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + $ADDU $h,$tmp0 + and $tmp1,$a $ADDU $T1,$tmp2 # +=K[$i] - $ADDU $h,$tmp1 - + $ADDU $h,$tmp1 # +=Maj(a,b,c) $ADDU $d,$T1 $ADDU $h,$T1 ___ @@ -207,6 +244,20 @@ my $i=@_[0]; my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) + $ROTR $tmp0,@X[1],@sigma0[1] + $ADDU @X[0],@X[9] # +=X[i+9] + xor $tmp2,$tmp0 + $ROTR $tmp0,@X[1],@sigma0[2] + + $SRL $tmp3,@X[14],@sigma1[0] + $ROTR $tmp1,@X[14],@sigma1[1] + xor $tmp2,$tmp0 # sigma0(X[i+1]) + $ROTR $tmp0,@X[14],@sigma1[2] + xor $tmp3,$tmp1 + $ADDU @X[0],$tmp2 +#else $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) $ADDU @X[0],@X[9] # +=X[i+9] $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` @@ -227,7 +278,7 @@ $code.=<<___; xor $tmp3,$tmp0 $SRL $tmp0,@X[14],@sigma1[2] xor $tmp3,$tmp1 - +#endif xor $tmp3,$tmp0 # sigma1(X[i+14]) $ADDU @X[0],$tmp3 ___ @@ -242,9 +293,13 @@ $code.=<<___; # include #endif +#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) +#define _MIPS_ARCH_MIPS32R2 +#endif + .text .set noat -#if !defined(__vxworks) || defined(__pic__) +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif @@ -351,7 +406,7 @@ $code.=<<___; $ST $G,6*$SZ($ctx) $ST $H,7*$SZ($ctx) - bnel $inp,@X[15],.Loop + bne $inp,@X[15],.Loop $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)