mul $nlo,$nj,$m1 // np[j]*m1
adds $lo1,$lo1,$lo0
umulh $nhi,$nj,$m1
- str $lo1,[$tp,#-16] // tp[j-1]
+ stur $lo1,[$tp,#-16] // tp[j-1]
cbnz $j,.Linner
.Linner_skip:
csel $nj,$tj,$aj,lo // did it borrow?
ldr $tj,[$tp],#8
ldr $aj,[$rp],#8
- str xzr,[$tp,#-16] // wipe tp
- str $nj,[$rp,#-16]
+ stur xzr,[$tp,#-16] // wipe tp
+ stur $nj,[$rp,#-16]
cbnz $num,.Lcond_copy
csel $nj,$tj,$aj,lo
- str xzr,[$tp,#-8] // wipe tp
- str $nj,[$rp,#-8]
+ stur xzr,[$tp,#-8] // wipe tp
+ stur $nj,[$rp,#-8]
ldp x19,x20,[x29,#16]
mov sp,x29
cmp $ap,$bp
b.ne __bn_mul4x_mont
.Lsqr8x_mont:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
ldp $a4,$a5,[$tp,#8*4]
ldp $a6,$a7,[$tp,#8*6]
adds $acc0,$acc0,$a0
- ldr $n0,[$rp,#-8*8]
+ ldur $n0,[$rp,#-8*8]
adcs $acc1,$acc1,$a1
ldp $a0,$a1,[$ap,#8*0]
adcs $acc2,$acc2,$a2
//adc $carry,xzr,xzr // moved below
cbz $cnt,.Lsqr8x8_post_condition
- ldr $n0,[$tp,#-8*8]
+ ldur $n0,[$tp,#-8*8]
ldp $a0,$a1,[$np,#8*0]
ldp $a2,$a3,[$np,#8*2]
ldp $a4,$a5,[$np,#8*4]
ldp $a6,$a7,[$tp,#8*6]
cbz $cnt,.Lsqr8x_tail_break
- ldr $n0,[$rp,#-8*8]
+ ldur $n0,[$rp,#-8*8]
adds $acc0,$acc0,$a0
adcs $acc1,$acc1,$a1
ldp $a0,$a1,[$np,#8*0]
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
+ .inst 0xd50323bf // autiasp
ret
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
___
.type __bn_mul4x_mont,%function
.align 5
__bn_mul4x_mont:
+ .inst 0xd503233f // paciasp
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
+ .inst 0xd50323bf // autiasp
ret
.size __bn_mul4x_mont,.-__bn_mul4x_mont
___