.type ecp_nistz256_mul_by_2,\@function,2
.align 64
ecp_nistz256_mul_by_2:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Lmul_by_2_body:
mov 8*0($a_ptr), $a0
mov $a3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Lmul_by_2_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
################################################################################
.type ecp_nistz256_div_by_2,\@function,2
.align 32
ecp_nistz256_div_by_2:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Ldiv_by_2_body:
mov 8*0($a_ptr), $a0
mov $a3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Ldiv_by_2_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
################################################################################
.type ecp_nistz256_mul_by_3,\@function,2
.align 32
ecp_nistz256_mul_by_3:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Lmul_by_3_body:
mov 8*0($a_ptr), $a0
mov $a3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Lmul_by_3_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
################################################################################
.type ecp_nistz256_add,\@function,3
.align 32
ecp_nistz256_add:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Ladd_body:
mov 8*0($a_ptr), $a0
mov $a3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Ladd_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_add,.-ecp_nistz256_add
################################################################################
.type ecp_nistz256_sub,\@function,3
.align 32
ecp_nistz256_sub:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Lsub_body:
mov 8*0($a_ptr), $a0
mov $a3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Lsub_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_sub,.-ecp_nistz256_sub
################################################################################
.type ecp_nistz256_neg,\@function,2
.align 32
ecp_nistz256_neg:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Lneg_body:
xor $a0, $a0
mov $a3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Lneg_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_neg,.-ecp_nistz256_neg
___
}
.type ecp_nistz256_mul_mont,\@function,3
.align 32
ecp_nistz256_mul_mont:
+.cfi_startproc
___
$code.=<<___ if ($addx);
mov \$0x80100, %ecx
$code.=<<___;
.Lmul_mont:
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lmul_body:
___
$code.=<<___ if ($addx);
$code.=<<___;
.Lmul_mont_done:
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbx
+.cfi_restore %rbx
mov 40(%rsp),%rbp
+.cfi_restore %rbp
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lmul_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
.type __ecp_nistz256_mul_montq,\@abi-omnipotent
.type ecp_nistz256_sqr_mont,\@function,2
.align 32
ecp_nistz256_sqr_mont:
+.cfi_startproc
___
$code.=<<___ if ($addx);
mov \$0x80100, %ecx
___
$code.=<<___;
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lsqr_body:
___
$code.=<<___ if ($addx);
$code.=<<___;
.Lsqr_mont_done:
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbx
+.cfi_restore %rbx
mov 40(%rsp),%rbp
+.cfi_restore %rbp
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lsqr_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
.type __ecp_nistz256_sqr_montq,\@abi-omnipotent
adox $t1, $acc5
.byte 0x67,0x67
mulx %rdx, $t0, $t4
- mov $acc0, %rdx
+ mov .Lpoly+8*3(%rip), %rdx
adox $t0, $acc6
shlx $a_ptr, $acc0, $t0
adox $t4, $acc7
shrx $a_ptr, $acc0, $t4
- mov .Lpoly+8*3(%rip), $t1
+ mov %rdx,$t1
# reduction step 1
add $t0, $acc1
adc $t4, $acc2
- mulx $t1, $t0, $acc0
- mov $acc1, %rdx
+ mulx $acc0, $t0, $acc0
adc $t0, $acc3
shlx $a_ptr, $acc1, $t0
adc \$0, $acc0
add $t0, $acc2
adc $t4, $acc3
- mulx $t1, $t0, $acc1
- mov $acc2, %rdx
+ mulx $acc1, $t0, $acc1
adc $t0, $acc0
shlx $a_ptr, $acc2, $t0
adc \$0, $acc1
add $t0, $acc3
adc $t4, $acc0
- mulx $t1, $t0, $acc2
- mov $acc3, %rdx
+ mulx $acc2, $t0, $acc2
adc $t0, $acc1
shlx $a_ptr, $acc3, $t0
adc \$0, $acc2
add $t0, $acc0
adc $t4, $acc1
- mulx $t1, $t0, $acc3
+ mulx $acc3, $t0, $acc3
adc $t0, $acc2
adc \$0, $acc3
- xor $t3, $t3 # cf=0
- adc $acc0, $acc4 # accumulate upper half
+ xor $t3, $t3
+ add $acc0, $acc4 # accumulate upper half
mov .Lpoly+8*1(%rip), $a_ptr
adc $acc1, $acc5
mov $acc4, $acc0
mov $acc5, $acc1
adc \$0, $t3
- xor %eax, %eax # cf=0
- sbb \$-1, $acc4 # .Lpoly[0]
+ sub \$-1, $acc4 # .Lpoly[0]
mov $acc6, $acc2
sbb $a_ptr, $acc5 # .Lpoly[1]
sbb \$0, $acc6 # .Lpoly[2]
.type ecp_nistz256_from_mont,\@function,2
.align 32
ecp_nistz256_from_mont:
+.cfi_startproc
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
.Lfrom_body:
mov 8*0($in_ptr), %rax
mov $acc3, 8*3($r_ptr)
mov 0(%rsp),%r13
+.cfi_restore %r13
mov 8(%rsp),%r12
+.cfi_restore %r12
lea 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
.Lfrom_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
___
}
.type ecp_nistz256_point_double,\@function,2
.align 32
ecp_nistz256_point_double:
+.cfi_startproc
___
$code.=<<___ if ($addx);
mov \$0x80100, %ecx
.type ecp_nistz256_point_doublex,\@function,2
.align 32
ecp_nistz256_point_doublex:
+.cfi_startproc
.Lpoint_doublex:
___
}
$code.=<<___;
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$32*5+8, %rsp
+.cfi_adjust_cfa_offset 32*5+8
.Lpoint_double${x}_body:
.Lpoint_double_shortcut$x:
call __ecp_nistz256_sub_from$x # p256_sub(res_y, S, res_y);
lea 32*5+56(%rsp), %rsi
+.cfi_def_cfa %rsi,8
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbx
+.cfi_restore %rbx
mov -8(%rsi),%rbp
+.cfi_restore %rbp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpoint_double${x}_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx
___
}
.type ecp_nistz256_point_add,\@function,3
.align 32
ecp_nistz256_point_add:
+.cfi_startproc
___
$code.=<<___ if ($addx);
mov \$0x80100, %ecx
.type ecp_nistz256_point_addx,\@function,3
.align 32
ecp_nistz256_point_addx:
+.cfi_startproc
.Lpoint_addx:
___
}
$code.=<<___;
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$32*18+8, %rsp
+.cfi_adjust_cfa_offset 32*18+8
.Lpoint_add${x}_body:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr
.Ladd_done$x:
lea 32*18+56(%rsp), %rsi
+.cfi_def_cfa %rsi,8
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbx
+.cfi_restore %rbx
mov -8(%rsi),%rbp
+.cfi_restore %rbp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpoint_add${x}_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx
___
}
.type ecp_nistz256_point_add_affine,\@function,3
.align 32
ecp_nistz256_point_add_affine:
+.cfi_startproc
___
$code.=<<___ if ($addx);
mov \$0x80100, %ecx
.type ecp_nistz256_point_add_affinex,\@function,3
.align 32
ecp_nistz256_point_add_affinex:
+.cfi_startproc
.Lpoint_add_affinex:
___
}
$code.=<<___;
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$32*15+8, %rsp
+.cfi_adjust_cfa_offset 32*15+8
.Ladd_affine${x}_body:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr
movdqu %xmm3, 0x30($r_ptr)
lea 32*15+56(%rsp), %rsi
+.cfi_def_cfa %rsi,8
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbx
+.cfi_restore %rbx
mov -8(%rsi),%rbp
+.cfi_restore %rbp
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Ladd_affine${x}_epilogue:
ret
+.cfi_endproc
.size ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx
___
}
mov 8(%r11),%r10d # HandlerData[2]
lea (%rax,%r10),%rax
- mov -8(%rax),%rbx
- mov -16(%rax),%rbp
+ mov -8(%rax),%rbp
+ mov -16(%rax),%rbx
mov -24(%rax),%r12
mov -32(%rax),%r13
mov -40(%rax),%r14