X-Git-Url: https://git.openssl.org/gitweb/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-mips.pl;h=e228f6c588827250480fd18ed8e8e38175fda01b;hb=0d4fb8439092ff8253af72ac6bc193e77ebbcf2f;hp=75a6fb49c6f4eb29c1feedcc6690e666566817be;hpb=0985473636b8bb998eb887c28489cfe5e57905a5;p=openssl.git

diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl
index 75a6fb49c6..e228f6c588 100644
--- a/crypto/sha/asm/sha1-mips.pl
+++ b/crypto/sha/asm/sha1-mips.pl
@@ -15,6 +15,10 @@
 # compatible subroutine. There is room for minor optimization on
 # little-endian platforms...
 
+# September 2012.
+#
+# Add MIPS32r2 code (>25% less instructions).
+
 ######################################################################
 # There is a number of MIPS ABI in use, O32 and N32/64 are most
 # widely used. Then there is a new contender: NUBI. It appears that if
@@ -42,7 +46,7 @@
 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
 #
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
+$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
 
 if ($flavour =~ /64|n32/i) {
 	$PTR_ADD="dadd";	# incidentally works even on n32
@@ -59,15 +63,14 @@ if ($flavour =~ /64|n32/i) {
 	$PTR_SLL="sll";
 	$SZREG=4;
 }
-$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff008 : 0x00ff0000;
 #
 # <appro@openssl.org>
 #
 ######################################################################
 
-for (@ARGV) {	$big_endian=1 if (/\-DB_ENDIAN/);
-		$big_endian=0 if (/\-DL_ENDIAN/);
-		$output=$_ if (/^\w[\w\-]*\.\w+$/);   }
+$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
+
+for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);   }
 open STDOUT,">$output";
 
 if (!defined($big_endian))
@@ -77,11 +80,11 @@ if (!defined($big_endian))
 $MSB=$big_endian?0:3;
 $LSB=3&~$MSB;
 
-@X=(	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15",
-	"\$16",	"\$17",	"\$18",	"\$19",	"\$20",	"\$21",	"\$22",	"\$23");
-$ctx="\$4";	# a0
-$inp="\$5";	# a1
-$num="\$6";	# a2
+@X=map("\$$_",(8..23));	# a4-a7,s0-s11
+
+$ctx=$a0;
+$inp=$a1;
+$num=$a2;
 $A="\$1";
 $B="\$2";
 $C="\$3";
@@ -92,12 +95,14 @@ $t1=$num;	# $num is offloaded to stack
 $t2="\$30";	# fp
 $K="\$31";	# ra
 
-$FRAMESIZE=16;
-
 sub BODY_00_14 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___	if (!$big_endian);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	wsbh	@X[$i],@X[$i]	# byte swap($i)
+	rotr	@X[$i],@X[$i],16
+#else
 	srl	$t0,@X[$i],24	# byte swap($i)
 	srl	$t1,@X[$i],8
 	andi	$t2,@X[$i],0xFF00
@@ -105,10 +110,24 @@ $code.=<<___	if (!$big_endian);
 	andi	$t1,0xFF00
 	sll	$t2,$t2,8
 	or	@X[$i],$t0
+	or	$t1,$t2
 	or	@X[$i],$t1
-	or	@X[$i],$t2
+#endif
 ___
 $code.=<<___;
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	addu	$e,$K		# $i
+	xor	$t0,$c,$d
+	rotr	$t1,$a,27
+	 lwl	@X[$j],$j*4+$MSB($inp)
+	and	$t0,$b
+	addu	$e,$t1
+	 lwr	@X[$j],$j*4+$LSB($inp)
+	xor	$t0,$d
+	addu	$e,@X[$i]
+	rotr	$b,$b,2
+	addu	$e,$t0
+#else
 	 lwl	@X[$j],$j*4+$MSB($inp)
 	sll	$t0,$a,5	# $i
 	addu	$e,$K
@@ -124,6 +143,7 @@ $code.=<<___;
 	addu	$e,@X[$i]
 	or	$b,$t2
 	addu	$e,$t0
+#endif
 ___
 }
 
@@ -132,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 
 $code.=<<___	if (!$big_endian && $i==15);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	wsbh	@X[$i],@X[$i]	# byte swap($i)
+	rotr	@X[$i],@X[$i],16
+#else
 	srl	$t0,@X[$i],24	# byte swap($i)
 	srl	$t1,@X[$i],8
 	andi	$t2,@X[$i],0xFF00
@@ -141,8 +165,24 @@ $code.=<<___	if (!$big_endian && $i==15);
 	or	@X[$i],$t0
 	or	@X[$i],$t1
 	or	@X[$i],$t2
+#endif
 ___
 $code.=<<___;
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	addu	$e,$K		# $i
+	 xor	@X[$j%16],@X[($j+2)%16]
+	xor	$t0,$c,$d
+	rotr	$t1,$a,27
+	 xor	@X[$j%16],@X[($j+8)%16]
+	and	$t0,$b
+	addu	$e,$t1
+	 xor	@X[$j%16],@X[($j+13)%16]
+	xor	$t0,$d
+	addu	$e,@X[$i%16]
+	 rotr	@X[$j%16],@X[$j%16],31
+	rotr	$b,$b,2
+	addu	$e,$t0
+#else
 	 xor	@X[$j%16],@X[($j+2)%16]
 	sll	$t0,$a,5	# $i
 	addu	$e,$K
@@ -162,6 +202,7 @@ $code.=<<___;
 	addu	$e,@X[$i%16]
 	or	$b,$t2
 	addu	$e,$t0
+#endif
 ___
 }
 
@@ -169,6 +210,20 @@ sub BODY_20_39 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___ if ($i<79);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	 xor	@X[$j%16],@X[($j+2)%16]
+	addu	$e,$K		# $i
+	rotr	$t1,$a,27
+	 xor	@X[$j%16],@X[($j+8)%16]
+	xor	$t0,$c,$d
+	addu	$e,$t1
+	 xor	@X[$j%16],@X[($j+13)%16]
+	xor	$t0,$b
+	addu	$e,@X[$i%16]
+	 rotr	@X[$j%16],@X[$j%16],31
+	rotr	$b,$b,2
+	addu	$e,$t0
+#else
 	 xor	@X[$j%16],@X[($j+2)%16]
 	sll	$t0,$a,5	# $i
 	addu	$e,$K
@@ -187,8 +242,24 @@ $code.=<<___ if ($i<79);
 	 or	@X[$j%16],$t1
 	or	$b,$t2
 	addu	$e,$t0
+#endif
 ___
 $code.=<<___ if ($i==79);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	 lw	@X[0],0($ctx)
+	addu	$e,$K		# $i
+	 lw	@X[1],4($ctx)
+	rotr	$t1,$a,27
+	 lw	@X[2],8($ctx)
+	xor	$t0,$c,$d
+	addu	$e,$t1
+	 lw	@X[3],12($ctx)
+	xor	$t0,$b
+	addu	$e,@X[$i%16]
+	 lw	@X[4],16($ctx)
+	rotr	$b,$b,2
+	addu	$e,$t0
+#else
 	 lw	@X[0],0($ctx)
 	sll	$t0,$a,5	# $i
 	addu	$e,$K
@@ -206,6 +277,7 @@ $code.=<<___ if ($i==79);
 	addu	$e,@X[$i%16]
 	or	$b,$t2
 	addu	$e,$t0
+#endif
 ___
 }
 
@@ -213,6 +285,22 @@ sub BODY_40_59 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___ if ($i<79);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+	addu	$e,$K		# $i
+	and	$t0,$c,$d
+	 xor	@X[$j%16],@X[($j+2)%16]
+	rotr	$t1,$a,27
+	addu	$e,$t0
+	 xor	@X[$j%16],@X[($j+8)%16]
+	xor	$t0,$c,$d
+	addu	$e,$t1
+	 xor	@X[$j%16],@X[($j+13)%16]
+	and	$t0,$b
+	addu	$e,@X[$i%16]
+	 rotr	@X[$j%16],@X[$j%16],31
+	rotr	$b,$b,2
+	addu	$e,$t0
+#else
 	 xor	@X[$j%16],@X[($j+2)%16]
 	sll	$t0,$a,5	# $i
 	addu	$e,$K
@@ -233,10 +321,22 @@ $code.=<<___ if ($i<79);
 	addu	$e,@X[$i%16]
 	or	$b,$t2
 	addu	$e,$t0
+#endif
 ___
 }
 
+$FRAMESIZE=16;	# large enough to accommodate NUBI saved registers
+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
+
 $code=<<___;
+#ifdef OPENSSL_FIPSCANISTER
+# include <openssl/fipssyms.h>
+#endif
+
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
 .text
 
 .set	noat
@@ -246,7 +346,7 @@ $code=<<___;
 .ent	sha1_block_data_order
 sha1_block_data_order:
 	.frame	$sp,$FRAMESIZE*$SZREG,$ra
-	.mask	0xd0000000|$SAVED_REGS_MASK,-$SZREG
+	.mask	$SAVED_REGS_MASK,-$SZREG
 	.set	noreorder
 	$PTR_SUB $sp,$FRAMESIZE*$SZREG
 	$REG_S	$ra,($FRAMESIZE-1)*$SZREG($sp)
@@ -260,7 +360,7 @@ sha1_block_data_order:
 	$REG_S	$s5,($FRAMESIZE-9)*$SZREG($sp)
 	$REG_S	$s4,($FRAMESIZE-10)*$SZREG($sp)
 ___
-$code.=<<___ if ($flavour =~ /nubi/i);
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
 	$REG_S	$s3,($FRAMESIZE-11)*$SZREG($sp)
 	$REG_S	$s2,($FRAMESIZE-12)*$SZREG($sp)
 	$REG_S	$s1,($FRAMESIZE-13)*$SZREG($sp)