projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
+20% tune-up for Power5.
[openssl.git]
/
crypto
/
bn
/
asm
/
ppc-mont.pl
diff --git
a/crypto/bn/asm/ppc-mont.pl
b/crypto/bn/asm/ppc-mont.pl
index e9149c98cf5347b7d13f00b0085a6ebf6337b82f..280d31244b72de6da467c35af9679172e40d2238 100644
(file)
--- a/
crypto/bn/asm/ppc-mont.pl
+++ b/
crypto/bn/asm/ppc-mont.pl
@@
-98,7
+98,7
@@
$nlo="r25";
$nhi="r0";
$code=<<___;
$nhi="r0";
$code=<<___;
-.machine
any
+.machine
"any"
.text
.globl .bn_mul_mont
.text
.globl .bn_mul_mont
@@
-162,17
+162,16
@@
$code=<<___;
.align 4
L1st:
$LDX $aj,$ap,$j ; ap[j]
.align 4
L1st:
$LDX $aj,$ap,$j ; ap[j]
- $LDX $nj,$np,$j ; np[j]
addc $lo0,$alo,$hi0
addc $lo0,$alo,$hi0
+ $LDX $nj,$np,$j ; np[j]
addze $hi0,$ahi
$UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
addze $hi0,$ahi
$UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
- $UMULH $ahi,$aj,$m0
-
addc $lo1,$nlo,$hi1
addc $lo1,$nlo,$hi1
+ $UMULH $ahi,$aj,$m0
addze $hi1,$nhi
$UMULL $nlo,$nj,$m1 ; np[j]*m1
addze $hi1,$nhi
$UMULL $nlo,$nj,$m1 ; np[j]*m1
- $UMULH $nhi,$nj,$m1
addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
+ $UMULH $nhi,$nj,$m1
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
@@
-206,20
+205,16
@@
Louter:
$LD $aj,$BNSZ($ap) ; ap[1]
$LD $nj,0($np) ; np[0]
addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
$LD $aj,$BNSZ($ap) ; ap[1]
$LD $nj,0($np) ; np[0]
addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
addze $hi0,$hi0
addze $hi0,$hi0
-
$UMULL $m1,$lo0,$n0 ; tp[0]*n0
$UMULL $m1,$lo0,$n0 ; tp[0]*n0
-
- $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
$UMULH $ahi,$aj,$m0
$UMULH $ahi,$aj,$m0
-
$UMULL $lo1,$nj,$m1 ; np[0]*m1
$UMULH $hi1,$nj,$m1
$LD $nj,$BNSZ($np) ; np[1]
addc $lo1,$lo1,$lo0
$UMULL $lo1,$nj,$m1 ; np[0]*m1
$UMULH $hi1,$nj,$m1
$LD $nj,$BNSZ($np) ; np[1]
addc $lo1,$lo1,$lo0
- addze $hi1,$hi1
-
$UMULL $nlo,$nj,$m1 ; np[1]*m1
$UMULL $nlo,$nj,$m1 ; np[1]*m1
+ addze $hi1,$hi1
$UMULH $nhi,$nj,$m1
\f
mtctr $num
$UMULH $nhi,$nj,$m1
\f
mtctr $num
@@
-227,24
+222,22
@@
Louter:
.align 4
Linner:
$LDX $aj,$ap,$j ; ap[j]
.align 4
Linner:
$LDX $aj,$ap,$j ; ap[j]
- $LD $tj,$BNSZ($tp) ; tp[j]
addc $lo0,$alo,$hi0
addc $lo0,$alo,$hi0
+ $LD $tj,$BNSZ($tp) ; tp[j]
addze $hi0,$ahi
$LDX $nj,$np,$j ; np[j]
addze $hi0,$ahi
$LDX $nj,$np,$j ; np[j]
- addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
- addze $hi0,$hi0
- $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
- $UMULH $ahi,$aj,$m0
-
addc $lo1,$nlo,$hi1
addc $lo1,$nlo,$hi1
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
addze $hi1,$nhi
addze $hi1,$nhi
+ $UMULH $ahi,$aj,$m0
+ addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
$UMULL $nlo,$nj,$m1 ; np[j]*m1
$UMULL $nlo,$nj,$m1 ; np[j]*m1
+ addze $hi0,$hi0
$UMULH $nhi,$nj,$m1
addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
$UMULH $nhi,$nj,$m1
addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
+ addi $j,$j,$BNSZ ; j++
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
-
- addi $j,$j,$BNSZ ; j++
addi $tp,$tp,$BNSZ ; tp++
bdnz- Linner
;Linner
addi $tp,$tp,$BNSZ ; tp++
bdnz- Linner
;Linner