push %r13
mov 8*0($a_ptr), $a0
+ xor $t4,$t4
mov 8*1($a_ptr), $a1
add $a0, $a0 # a0:a3+a0:a3
mov 8*2($a_ptr), $a2
adc $a2, $a2
adc $a3, $a3
mov $a1, $t1
- sbb $t4, $t4
+ adc \$0, $t4
sub 8*0($a_ptr), $a0
mov $a2, $t2
sbb 8*2($a_ptr), $a2
mov $a3, $t3
sbb 8*3($a_ptr), $a3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $a0
- cmovz $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovz $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovz $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
sbb \$0, $a2
mov $a3, $t3
sbb .Lpoly+8*3(%rip), $a3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $a0
- cmovz $t1, $a1
- cmovz $t2, $a2
- cmovz $t3, $a3
+ cmovb $t0, $a0
+ cmovb $t1, $a1
+ cmovb $t2, $a2
+ cmovb $t3, $a3
xor $t4, $t4
add 8*0($a_ptr), $a0 # a0:a3+=a_ptr[0:3]
sbb \$0, $a2
mov $a3, $t3
sbb .Lpoly+8*3(%rip), $a3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $a0
- cmovz $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovz $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovz $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
sbb 8*2($a_ptr), $a2
mov $a3, $t3
sbb 8*3($a_ptr), $a3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $a0
- cmovz $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovz $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovz $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
.type __ecp_nistz256_add_toq,\@abi-omnipotent
.align 32
__ecp_nistz256_add_toq:
+ xor $t4,$t4
add 8*0($b_ptr), $a0
adc 8*1($b_ptr), $a1
mov $a0, $t0
adc 8*2($b_ptr), $a2
adc 8*3($b_ptr), $a3
mov $a1, $t1
- sbb $t4, $t4
+ adc \$0, $t4
sub \$-1, $a0
mov $a2, $t2
sbb \$0, $a2
mov $a3, $t3
sbb $poly3, $a3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $a0
- cmovz $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovz $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovz $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
.type __ecp_nistz256_mul_by_2q,\@abi-omnipotent
.align 32
__ecp_nistz256_mul_by_2q:
+ xor $t4, $t4
add $a0, $a0 # a0:a3+a0:a3
adc $a1, $a1
mov $a0, $t0
adc $a2, $a2
adc $a3, $a3
mov $a1, $t1
- sbb $t4, $t4
+ adc \$0, $t4
sub \$-1, $a0
mov $a2, $t2
sbb \$0, $a2
mov $a3, $t3
sbb $poly3, $a3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $a0
- cmovz $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovz $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovz $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
#lea $Hsqr(%rsp), $r_ptr # 2*U1*H^2
#call __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2);
+ xor $t4, $t4
add $acc0, $acc0 # a0:a3+a0:a3
lea $Rsqr(%rsp), $a_ptr
adc $acc1, $acc1
adc $acc2, $acc2
adc $acc3, $acc3
mov $acc1, $t1
- sbb $t4, $t4
+ adc \$0, $t4
sub \$-1, $acc0
mov $acc2, $t2
sbb \$0, $acc2
mov $acc3, $t3
sbb $poly3, $acc3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $acc0
+ cmovb $t0, $acc0
mov 8*0($a_ptr), $t0
- cmovz $t1, $acc1
+ cmovb $t1, $acc1
mov 8*1($a_ptr), $t1
- cmovz $t2, $acc2
+ cmovb $t2, $acc2
mov 8*2($a_ptr), $t2
- cmovz $t3, $acc3
+ cmovb $t3, $acc3
mov 8*3($a_ptr), $t3
call __ecp_nistz256_sub$x # p256_sub(res_x, Rsqr, Hsqr);
#lea $Hsqr(%rsp), $r_ptr # 2*U1*H^2
#call __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2);
+ xor $t4, $t4
add $acc0, $acc0 # a0:a3+a0:a3
lea $Rsqr(%rsp), $a_ptr
adc $acc1, $acc1
adc $acc2, $acc2
adc $acc3, $acc3
mov $acc1, $t1
- sbb $t4, $t4
+ adc \$0, $t4
sub \$-1, $acc0
mov $acc2, $t2
sbb \$0, $acc2
mov $acc3, $t3
sbb $poly3, $acc3
- test $t4, $t4
+ sbb \$0, $t4
- cmovz $t0, $acc0
+ cmovb $t0, $acc0
mov 8*0($a_ptr), $t0
- cmovz $t1, $acc1
+ cmovb $t1, $acc1
mov 8*1($a_ptr), $t1
- cmovz $t2, $acc2
+ cmovb $t2, $acc2
mov 8*2($a_ptr), $t2
- cmovz $t3, $acc3
+ cmovb $t3, $acc3
mov 8*3($a_ptr), $t3
call __ecp_nistz256_sub$x # p256_sub(res_x, Rsqr, Hsqr);
sbb \$0, $a2
mov $a3, $t3
sbb $poly3, $a3
+ sbb \$0, $t4
- bt \$0, $t4
- cmovnc $t0, $a0
- cmovnc $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovnc $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovnc $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
sbb \$0, $a2
mov $a3, $t3
sbb $poly3, $a3
+ sbb \$0, $t4
- bt \$0, $t4
- cmovnc $t0, $a0
- cmovnc $t1, $a1
+ cmovb $t0, $a0
+ cmovb $t1, $a1
mov $a0, 8*0($r_ptr)
- cmovnc $t2, $a2
+ cmovb $t2, $a2
mov $a1, 8*1($r_ptr)
- cmovnc $t3, $a3
+ cmovb $t3, $a3
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)