X-Git-Url: https://git.openssl.org/gitweb/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-mips.pl;h=e228f6c588827250480fd18ed8e8e38175fda01b;hb=0d4fb8439092ff8253af72ac6bc193e77ebbcf2f;hp=75a6fb49c6f4eb29c1feedcc6690e666566817be;hpb=0985473636b8bb998eb887c28489cfe5e57905a5;p=openssl.git diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl index 75a6fb49c6..e228f6c588 100644 --- a/crypto/sha/asm/sha1-mips.pl +++ b/crypto/sha/asm/sha1-mips.pl @@ -15,6 +15,10 @@ # compatible subroutine. There is room for minor optimization on # little-endian platforms... +# September 2012. +# +# Add MIPS32r2 code (>25% less instructions). + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -42,7 +46,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -59,15 +63,14 @@ if ($flavour =~ /64|n32/i) { $PTR_SLL="sll"; $SZREG=4; } -$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff008 : 0x00ff0000; # # # ###################################################################### -for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); - $big_endian=0 if (/\-DL_ENDIAN/); - $output=$_ if (/^\w[\w\-]*\.\w+$/); } +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); + +for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; if (!defined($big_endian)) @@ -77,11 +80,11 @@ if (!defined($big_endian)) $MSB=$big_endian?0:3; $LSB=3&~$MSB; -@X=( "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15", - "\$16", "\$17", "\$18", "\$19", "\$20", "\$21", "\$22", "\$23"); -$ctx="\$4"; # a0 -$inp="\$5"; # a1 -$num="\$6"; # a2 +@X=map("\$$_",(8..23)); # a4-a7,s0-s11 + +$ctx=$a0; +$inp=$a1; +$num=$a2; $A="\$1"; $B="\$2"; $C="\$3"; @@ -92,12 +95,14 @@ $t1=$num; # $num is offloaded to stack $t2="\$30"; # fp $K="\$31"; # ra -$FRAMESIZE=16; - sub BODY_00_14 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if (!$big_endian); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[$i],@X[$i] # byte swap($i) + rotr @X[$i],@X[$i],16 +#else srl $t0,@X[$i],24 # byte swap($i) srl $t1,@X[$i],8 andi $t2,@X[$i],0xFF00 @@ -105,10 +110,24 @@ $code.=<<___ if (!$big_endian); andi $t1,0xFF00 sll $t2,$t2,8 or @X[$i],$t0 + or $t1,$t2 or @X[$i],$t1 - or @X[$i],$t2 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + addu $e,$K # $i + xor $t0,$c,$d + rotr $t1,$a,27 + lwl @X[$j],$j*4+$MSB($inp) + and $t0,$b + addu $e,$t1 + lwr @X[$j],$j*4+$LSB($inp) + xor $t0,$d + addu $e,@X[$i] + rotr $b,$b,2 + addu $e,$t0 +#else lwl @X[$j],$j*4+$MSB($inp) sll $t0,$a,5 # $i addu $e,$K @@ -124,6 +143,7 @@ $code.=<<___; addu $e,@X[$i] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -132,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if (!$big_endian && $i==15); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[$i],@X[$i] # byte swap($i) + rotr @X[$i],@X[$i],16 +#else srl $t0,@X[$i],24 # byte swap($i) srl $t1,@X[$i],8 andi $t2,@X[$i],0xFF00 @@ -141,8 +165,24 @@ $code.=<<___ if (!$big_endian && $i==15); or @X[$i],$t0 or @X[$i],$t1 or @X[$i],$t2 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + addu $e,$K # $i + xor @X[$j%16],@X[($j+2)%16] + xor $t0,$c,$d + rotr $t1,$a,27 + xor @X[$j%16],@X[($j+8)%16] + and $t0,$b + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + xor $t0,$d + addu $e,@X[$i%16] + rotr @X[$j%16],@X[$j%16],31 + rotr $b,$b,2 + addu $e,$t0 +#else xor @X[$j%16],@X[($j+2)%16] sll $t0,$a,5 # $i addu $e,$K @@ -162,6 +202,7 @@ $code.=<<___; addu $e,@X[$i%16] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -169,6 +210,20 @@ sub BODY_20_39 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + xor @X[$j%16],@X[($j+2)%16] + addu $e,$K # $i + rotr $t1,$a,27 + xor @X[$j%16],@X[($j+8)%16] + xor $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + xor $t0,$b + addu $e,@X[$i%16] + rotr @X[$j%16],@X[$j%16],31 + rotr $b,$b,2 + addu $e,$t0 +#else xor @X[$j%16],@X[($j+2)%16] sll $t0,$a,5 # $i addu $e,$K @@ -187,8 +242,24 @@ $code.=<<___ if ($i<79); or @X[$j%16],$t1 or $b,$t2 addu $e,$t0 +#endif ___ $code.=<<___ if ($i==79); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + lw @X[0],0($ctx) + addu $e,$K # $i + lw @X[1],4($ctx) + rotr $t1,$a,27 + lw @X[2],8($ctx) + xor $t0,$c,$d + addu $e,$t1 + lw @X[3],12($ctx) + xor $t0,$b + addu $e,@X[$i%16] + lw @X[4],16($ctx) + rotr $b,$b,2 + addu $e,$t0 +#else lw @X[0],0($ctx) sll $t0,$a,5 # $i addu $e,$K @@ -206,6 +277,7 @@ $code.=<<___ if ($i==79); addu $e,@X[$i%16] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -213,6 +285,22 @@ sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + addu $e,$K # $i + and $t0,$c,$d + xor @X[$j%16],@X[($j+2)%16] + rotr $t1,$a,27 + addu $e,$t0 + xor @X[$j%16],@X[($j+8)%16] + xor $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + and $t0,$b + addu $e,@X[$i%16] + rotr @X[$j%16],@X[$j%16],31 + rotr $b,$b,2 + addu $e,$t0 +#else xor @X[$j%16],@X[($j+2)%16] sll $t0,$a,5 # $i addu $e,$K @@ -233,10 +321,22 @@ $code.=<<___ if ($i<79); addu $e,@X[$i%16] or $b,$t2 addu $e,$t0 +#endif ___ } +$FRAMESIZE=16; # large enough to accommodate NUBI saved registers +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; + $code=<<___; +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) +#define _MIPS_ARCH_MIPS32R2 +#endif + .text .set noat @@ -246,7 +346,7 @@ $code=<<___; .ent sha1_block_data_order sha1_block_data_order: .frame $sp,$FRAMESIZE*$SZREG,$ra - .mask 0xd0000000|$SAVED_REGS_MASK,-$SZREG + .mask $SAVED_REGS_MASK,-$SZREG .set noreorder $PTR_SUB $sp,$FRAMESIZE*$SZREG $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) @@ -260,7 +360,7 @@ sha1_block_data_order: $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) ___ -$code.=<<___ if ($flavour =~ /nubi/i); +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)