stp x25,x26,[sp,#64]
sub sp,sp,#32*12
- ldp $a0,$a1,[$bp]
- ldp $a2,$a3,[$bp,#16]
- ldp $t0,$t1,[$bp,#32]
- ldp $t2,$t3,[$bp,#48]
+ ldp $a0,$a1,[$bp,#64] // in2_z
+ ldp $a2,$a3,[$bp,#64+16]
mov $rp_real,$rp
mov $ap_real,$ap
mov $bp_real,$bp
- orr $a0,$a0,$a1
- orr $a2,$a2,$a3
- ldp $acc0,$acc1,[$ap]
- orr $t0,$t0,$t1
- orr $t2,$t2,$t3
- ldp $acc2,$acc3,[$ap,#16]
- orr $a0,$a0,$a2
- orr $t2,$t0,$t2
- ldp $t0,$t1,[$ap,#32]
- orr $in2infty,$a0,$t2
- cmp $in2infty,#0
- ldp $t2,$t3,[$ap,#48]
- csetm $in2infty,ne // !in2infty
-
- ldp $a0,$a1,[$bp_real,#64] // forward load for p256_sqr_mont
- orr $acc0,$acc0,$acc1
- orr $acc2,$acc2,$acc3
- ldp $a2,$a3,[$bp_real,#64+16]
- orr $t0,$t0,$t1
- orr $t2,$t2,$t3
- orr $acc0,$acc0,$acc2
- orr $t0,$t0,$t2
- orr $in1infty,$acc0,$t0
- cmp $in1infty,#0
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
- csetm $in1infty,ne // !in1infty
-
+ orr $t0,$a0,$a1
+ orr $t2,$a2,$a3
+ orr $in2infty,$t0,$t2
+ cmp $in2infty,#0
+ csetm $in2infty,ne // !in2infty
add $rp,sp,#$Z2sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
- ldp $a0,$a1,[$ap_real,#64]
+ ldp $a0,$a1,[$ap_real,#64] // in1_z
ldp $a2,$a3,[$ap_real,#64+16]
+ orr $t0,$a0,$a1
+ orr $t2,$a2,$a3
+ orr $in1infty,$t0,$t2
+ cmp $in1infty,#0
+ csetm $in1infty,ne // !in1infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
- ldp $a0,$a1,[$ap]
- ldp $a2,$a3,[$ap,#16]
- ldp $t0,$t1,[$ap,#32]
- ldp $t2,$t3,[$ap,#48]
- orr $a0,$a0,$a1
- orr $a2,$a2,$a3
- orr $t0,$t0,$t1
- orr $t2,$t2,$t3
- orr $a0,$a0,$a2
- orr $t0,$t0,$t2
- orr $in1infty,$a0,$t0
+ ldp $a0,$a1,[$ap,#64] // in1_z
+ ldp $a2,$a3,[$ap,#64+16]
+ orr $t0,$a0,$a1
+ orr $t2,$a2,$a3
+ orr $in1infty,$t0,$t2
cmp $in1infty,#0
csetm $in1infty,ne // !in1infty
- ldp $a0,$a1,[$bp]
- ldp $a2,$a3,[$bp,#16]
- ldp $t0,$t1,[$bp,#32]
+ ldp $acc0,$acc1,[$bp] // in2_x
+ ldp $acc2,$acc3,[$bp,#16]
+ ldp $t0,$t1,[$bp,#32] // in2_y
ldp $t2,$t3,[$bp,#48]
- orr $a0,$a0,$a1
- orr $a2,$a2,$a3
+ orr $acc0,$acc0,$acc1
+ orr $acc2,$acc2,$acc3
orr $t0,$t0,$t1
orr $t2,$t2,$t3
- orr $a0,$a0,$a2
+ orr $acc0,$acc0,$acc2
orr $t0,$t0,$t2
- orr $in2infty,$a0,$t0
+ orr $in2infty,$acc0,$t0
cmp $in2infty,#0
csetm $in2infty,ne // !in2infty
- ldp $a0,$a1,[$ap_real,#64]
- ldp $a2,$a3,[$ap_real,#64+16]
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);