Minor sparcv9 clean-ups.

[openssl.git] / crypto / bn / asm / sparcv9-mont.pl
diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl

index 0339bfe7f3a82a7c298c26de495bd0e923994e14..acdf6928ca52c1c65b93861364aa90faafcb4e70 100644 (file)
--- a/crypto/bn/asm/sparcv9-mont.pl
+++ b/crypto/bn/asm/sparcv9-mont.pl
@@ -72,7 +72,7 @@ $apj="%l5";
  $npj="%l6";
  $tpj="%l7";
  
-$fname="bn_mul_mont";
+$fname="bn_mul_mont_int";
  
  $code=<<___;
  .section       ".text",#alloc,#execinstr
@@ -94,17 +94,17 @@ $fname:
         cmp     $ap,$bp
         and     $num,$mask,$num
         ld      [$bp],$mul0             ! bp[0]
-       be,pt   `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
         nop
  
         add     %sp,$bias,%o7           ! real top of stack
-       ld      [$ap],$car0             ! ap[0]
+       ld      [$ap],$car0             ! ap[0] ! redundant in squaring context
         sub     %o7,$num,%o7
         ld      [$ap+4],$apj            ! ap[1]
         and     %o7,-1024,%o7
         ld      [$np],$car1             ! np[0]
         sub     %o7,$bias,%sp           ! alloca
         ld      [$np+4],$npj            ! np[1]
+       be,pt   `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
         mov     12,$j
  
         mulx    $car0,$mul0,$car0       ! ap[0]*bp[0]
@@ -298,22 +298,14 @@ $fname:
  ___
  \f
  ########
-######## bn_sqr_mont gives up to 20% improvement over above code
+######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
+######## code without following dedicated squaring procedure.
  ########
  $sbit="%i2";           # re-use $bp!
  
  $code.=<<___;
  .align 32
  .Lbn_sqr_mont:
-       add     %sp,$bias,%o7                   ! real top of stack
-       ld      [$ap+4],$apj                    ! ap[1]
-       sub     %o7,$num,%o7
-       ld      [$np],$car1                     ! np[0]
-       and     %o7,-1024,%o7
-       ld      [$np+4],$npj                    ! np[1]
-       sub     %o7,$bias,%sp                   ! alloca
-       mov     12,$j
-
         mulx    $mul0,$mul0,$car0               ! ap[0]*ap[0]
         mulx    $apj,$mul0,$tmp0                !prologue!
         and     $car0,$mask,$acc0