$m1="s5";
$code=<<___;
+#ifdef __linux__
+#include <asm/regdef.h>
+#else
#include <asm.h>
#include <regdef.h>
+#endif
.text
.align 5
.ent bn_mul_mont
bn_mul_mont:
- lda sp,-40(sp)
+ lda sp,-48(sp)
stq ra,0(sp)
stq s3,8(sp)
stq s4,16(sp)
stq s5,24(sp)
stq fp,32(sp)
mov sp,fp
- .mask 0x0400f000,-40
- .frame fp,40,ra
+ .mask 0x0400f000,-48
+ .frame fp,48,ra
.prologue 0
.align 4
ldq $aj,8($ap)
subq sp,AT,sp
ldq $bi,0($bp) # bp[0]
- mov -4096,AT
+ lda AT,-4096(zero) # mov -4096,AT
ldq $n0,0($n0)
and sp,AT,sp
.align 4
.L1st:
.set noreorder
- ldq $aj,($aj)
+ ldq $aj,0($aj)
addl $j,1,$j
- ldq $nj,($nj)
+ ldq $nj,0($nj)
lda $tp,8($tp)
addq $alo,$hi0,$lo0
.align 4
.Louter:
s8addq $i,$bp,$bi
- ldq $hi0,($ap)
+ ldq $hi0,0($ap)
ldq $aj,8($ap)
- ldq $bi,($bi)
- ldq $hi1,($np)
+ ldq $bi,0($bi)
+ ldq $hi1,0($np)
ldq $nj,8($np)
- ldq $tj,(sp)
+ ldq $tj,0(sp)
mulq $hi0,$bi,$lo0
umulh $hi0,$bi,$hi0
.set noreorder
ldq $tj,8($tp) #L0
nop #U1
- ldq $aj,($aj) #L1
+ ldq $aj,0($aj) #L1
s8addq $j,$np,$nj #U0
- ldq $nj,($nj) #L0
+ ldq $nj,0($nj) #L0
nop #U1
addq $alo,$hi0,$lo0 #L1
lda $tp,8($tp)
addq $hi1,v0,$hi1
addq $hi1,$hi0,$lo1
- stq $j,($tp)
+ stq $j,0($tp)
cmpult $lo1,$hi0,$hi1
addq $lo1,$tj,$lo1
cmpult $lo1,$tj,AT
stq $hi1,16($tp)
bne $tj,.Louter
\f
- s8addq $num,sp,$ap
- mov $rp,$bp
+ s8addq $num,sp,$tj # &tp[num]
+ mov $rp,$bp # put rp aside
mov sp,$tp
- mov 0,$hi0
-
- bne $hi1,.Lsub
- cmpult $nj,$lo1,AT
- bne AT,.Lsub
+ mov sp,$ap
+ mov 0,$hi0 # clear borrow bit
.align 4
-.Lcopy: ldq AT,($tp)
+.Lsub: ldq $lo0,0($tp)
+ ldq $lo1,0($np)
lda $tp,8($tp)
- stq AT,($rp)
- cmpult $tp,$ap,AT
- stq zero,-8($tp)
- nop
- lda $rp,8($rp)
- bne AT,.Lcopy
- mov 1,v0
- br .Lexit
-
-.align 4
-.Lsub: ldq $lo0,($tp)
- ldq $lo1,($np)
- subq $lo0,$lo1,$lo1
+ lda $np,8($np)
+ subq $lo0,$lo1,$lo1 # tp[i]-np[i]
cmpult $lo0,$lo1,AT
subq $lo1,$hi0,$lo0
cmpult $lo1,$lo0,$hi0
- lda $tp,8($tp)
or $hi0,AT,$hi0
- lda $np,8($np)
- stq $lo0,($rp)
- cmpult $tp,$ap,v0
+ stq $lo0,0($rp)
+ cmpult $tp,$tj,v0
lda $rp,8($rp)
bne v0,.Lsub
- subq $hi1,$hi0,$hi0
+ subq $hi1,$hi0,$hi0 # handle upmost overflow bit
mov sp,$tp
- cmpule $hi1,$hi0,AT
- mov $bp,$rp
- bne AT,.Lcopy
+ mov $bp,$rp # restore rp
+
+ and sp,$hi0,$ap
+ bic $bp,$hi0,$bp
+ bis $bp,$ap,$ap # ap=borrow?tp:rp
.align 4
-.Lzap: stq zero,($tp)
- cmpult $tp,$ap,AT
+.Lcopy: ldq $aj,0($ap) # copy or in-place refresh
lda $tp,8($tp)
- bne AT,.Lzap
+ lda $rp,8($rp)
+ lda $ap,8($ap)
+ stq zero,-8($tp) # zap tp
+ cmpult $tp,$tj,AT
+ stq $aj,-8($rp)
+ bne AT,.Lcopy
mov 1,v0
-.align 4
.Lexit:
.set noreorder
mov fp,sp
ldq s4,16(sp)
ldq s5,24(sp)
ldq fp,32(sp)
- lda sp,40(sp)
+ lda sp,48(sp)
ret (ra)
.end bn_mul_mont
-.rdata
-.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
___
print $code;