From 76e624a003db22db2d99ece04a15e20fe44c1fbe Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 8 Feb 2017 10:12:28 +0100 Subject: [PATCH 1/1] bn/asm/x86_64*: add DWARF CFI directives. Reviewed-by: Rich Salz --- crypto/bn/asm/rsaz-avx2.pl | 40 ++++++++++++ crypto/bn/asm/rsaz-x86_64.pl | 85 ++++++++++++++++++++++++++ crypto/bn/asm/x86_64-gf2m.pl | 18 ++++++ crypto/bn/asm/x86_64-mont.pl | 72 ++++++++++++++++++++++ crypto/bn/asm/x86_64-mont5.pl | 111 +++++++++++++++++++++++++++++++++- 5 files changed, 324 insertions(+), 2 deletions(-) diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 6d23fefd2c..f45c2149fc 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -168,13 +168,21 @@ $code.=<<___; .type rsaz_1024_sqr_avx2,\@function,5 .align 64 rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 vzeroupper ___ $code.=<<___ if ($win64); @@ -193,6 +201,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp mov %rdx, $np # reassigned argument sub \$$FrameSize, %rsp mov $np, $tmp @@ -825,6 +834,7 @@ $code.=<<___; vzeroall mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); .Lsqr_1024_in_tail: @@ -841,14 +851,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lsqr_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 ___ } @@ -901,13 +919,21 @@ $code.=<<___; .type rsaz_1024_mul_avx2,\@function,5 .align 64 rsaz_1024_mul_avx2: +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); vzeroupper @@ -926,6 +952,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp vzeroall mov %rdx, $bp # reassigned argument sub \$64,%rsp @@ -1459,6 +1486,7 @@ $code.=<<___; vzeroupper mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); .Lmul_1024_in_tail: @@ -1475,14 +1503,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lmul_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 ___ } @@ -1601,8 +1637,10 @@ rsaz_1024_scatter5_avx2: .type rsaz_1024_gather5_avx2,\@abi-omnipotent .align 32 rsaz_1024_gather5_avx2: +.cfi_startproc vzeroupper mov %rsp,%r11 +.cfi_def_cfa_register %r11 ___ $code.=<<___ if ($win64); lea -0x88(%rsp),%rax @@ -1743,7 +1781,9 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r11),%rsp +.cfi_def_cfa_register %rsp ret +.cfi_endproc .LSEH_end_rsaz_1024_gather5: .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 ___ diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 7bcfafe8dd..1b88026f08 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -138,14 +138,22 @@ $code.=<<___; .type rsaz_512_sqr,\@function,5 .align 32 rsaz_512_sqr: # 25-29% faster than rsaz_512_mul +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lsqr_body: movq $mod, %rbp # common argument movq ($inp), %rdx @@ -800,15 +808,24 @@ ___ $code.=<<___; leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lsqr_epilogue: ret +.cfi_endproc .size rsaz_512_sqr,.-rsaz_512_sqr ___ } @@ -819,14 +836,22 @@ $code.=<<___; .type rsaz_512_mul,\@function,5 .align 32 rsaz_512_mul: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_body: movq $out, %xmm0 # off-load arguments movq $mod, %xmm1 @@ -896,15 +921,24 @@ $code.=<<___; call __rsaz_512_subtract leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size rsaz_512_mul,.-rsaz_512_mul ___ } @@ -915,14 +949,22 @@ $code.=<<___; .type rsaz_512_mul_gather4,\@function,6 .align 32 rsaz_512_mul_gather4: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$`128+24+($win64?0xb0:0)`, %rsp +.cfi_adjust_cfa_offset `128+24+($win64?0xb0:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,0xa0(%rsp) @@ -1348,15 +1390,24 @@ $code.=<<___ if ($win64); lea 0xb0(%rax),%rax ___ $code.=<<___; +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_gather4_epilogue: ret +.cfi_endproc .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 ___ } @@ -1367,15 +1418,23 @@ $code.=<<___; .type rsaz_512_mul_scatter4,\@function,6 .align 32 rsaz_512_mul_scatter4: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 mov $pwr, $pwr subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_scatter4_body: leaq ($tbl,$pwr,8), $tbl movq $out, %xmm0 # off-load arguments @@ -1458,15 +1517,24 @@ $code.=<<___; movq %r15, 128*7($inp) leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_scatter4_epilogue: ret +.cfi_endproc .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 ___ } @@ -1477,14 +1545,22 @@ $code.=<<___; .type rsaz_512_mul_by_one,\@function,4 .align 32 rsaz_512_mul_by_one: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_by_one_body: ___ $code.=<<___ if ($addx); @@ -1539,15 +1615,24 @@ $code.=<<___; movq %r15, 56($out) leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_by_one_epilogue: ret +.cfi_endproc .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one ___ } diff --git a/crypto/bn/asm/x86_64-gf2m.pl b/crypto/bn/asm/x86_64-gf2m.pl index d237c1e3d2..0181f52ca4 100644 --- a/crypto/bn/asm/x86_64-gf2m.pl +++ b/crypto/bn/asm/x86_64-gf2m.pl @@ -54,7 +54,9 @@ $code.=<<___; .type _mul_1x1,\@abi-omnipotent .align 16 _mul_1x1: +.cfi_startproc sub \$128+8,%rsp +.cfi_adjust_cfa_offset 128+8 mov \$-1,$a1 lea ($a,$a),$i0 shr \$3,$a1 @@ -160,8 +162,10 @@ $code.=<<___; xor $i1,$hi add \$128+8,%rsp +.cfi_adjust_cfa_offset -128-8 ret .Lend_mul_1x1: +.cfi_endproc .size _mul_1x1,.-_mul_1x1 ___ @@ -174,6 +178,7 @@ $code.=<<___; .type bn_GF2m_mul_2x2,\@abi-omnipotent .align 16 bn_GF2m_mul_2x2: +.cfi_startproc mov %rsp,%rax mov OPENSSL_ia32cap_P(%rip),%r10 bt \$33,%r10 @@ -211,6 +216,7 @@ $code.=<<___; .align 16 .Lvanilla_mul_2x2: lea -8*17(%rsp),%rsp +.cfi_adjust_cfa_offset 8*17 ___ $code.=<<___ if ($win64); mov `8*17+40`(%rsp),$b0 @@ -219,10 +225,15 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %r14,8*10(%rsp) +.cfi_rel_offset %r14,8*10 mov %r13,8*11(%rsp) +.cfi_rel_offset %r13,8*11 mov %r12,8*12(%rsp) +.cfi_rel_offset %r12,8*12 mov %rbp,8*13(%rsp) +.cfi_rel_offset %rbp,8*13 mov %rbx,8*14(%rsp) +.cfi_rel_offset %rbx,8*14 .Lbody_mul_2x2: mov $rp,32(%rsp) # save the arguments mov $a1,40(%rsp) @@ -270,10 +281,15 @@ $code.=<<___; mov $lo,8(%rbp) mov 8*10(%rsp),%r14 +.cfi_restore %r14 mov 8*11(%rsp),%r13 +.cfi_restore %r13 mov 8*12(%rsp),%r12 +.cfi_restore %r12 mov 8*13(%rsp),%rbp +.cfi_restore %rbp mov 8*14(%rsp),%rbx +.cfi_restore %rbx ___ $code.=<<___ if ($win64); mov 8*15(%rsp),%rdi @@ -281,9 +297,11 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea 8*17(%rsp),%rsp +.cfi_adjust_cfa_offset -8*17 .Lepilogue_mul_2x2: ret .Lend_mul_2x2: +.cfi_endproc .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 .asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by " .align 16 diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index 7b5e88547b..cf3daa2bb5 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -104,8 +104,10 @@ $code=<<___; .type bn_mul_mont,\@function,6 .align 16 bn_mul_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$3,${num}d jnz .Lmul_enter cmp \$8,${num}d @@ -124,11 +126,17 @@ $code.=<<___; .align 16 .Lmul_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -161,6 +169,7 @@ $code.=<<___; .Lmul_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: mov $bp,%r12 # reassign $bp ___ @@ -331,16 +340,25 @@ $code.=<<___; jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont,.-bn_mul_mont ___ {{{ @@ -350,8 +368,10 @@ $code.=<<___; .type bn_mul4x_mont,\@function,6 .align 16 bn_mul4x_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -361,11 +381,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -389,6 +415,7 @@ $code.=<<___; .Lmul4x_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul4x_body: mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp mov %rdx,%r12 # reassign $bp @@ -767,16 +794,25 @@ ___ } $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi, 8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont,.-bn_mul4x_mont ___ }}} @@ -804,14 +840,22 @@ $code.=<<___; .type bn_sqr8x_mont,\@function,6 .align 32 bn_sqr8x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lsqr8x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lsqr8x_prologue: mov ${num}d,%r10d @@ -867,6 +911,7 @@ bn_sqr8x_mont: mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lsqr8x_body: movq $nptr, %xmm2 # save pointer to modulus @@ -936,6 +981,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lsqr8x_cond_copy .align 32 @@ -965,14 +1011,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lsqr8x_epilogue: ret +.cfi_endproc .size bn_sqr8x_mont,.-bn_sqr8x_mont ___ }}} @@ -984,14 +1038,22 @@ $code.=<<___; .type bn_mulx4x_mont,\@function,6 .align 32 bn_mulx4x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -1037,6 +1099,7 @@ bn_mulx4x_mont: mov $n0, 24(%rsp) # save *n0 mov $rp, 32(%rsp) # save $rp mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 mov $num,48(%rsp) # inner counter jmp .Lmulx4x_body @@ -1286,6 +1349,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lmulx4x_cond_copy .align 32 @@ -1315,14 +1379,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r15 mov -32(%rsi),%r13 +.cfi_restore %r15 mov -24(%rsi),%r12 +.cfi_restore %r15 mov -16(%rsi),%rbp +.cfi_restore %r15 mov -8(%rsi),%rbx +.cfi_restore %r15 lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont,.-bn_mulx4x_mont ___ }}} diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 226f4360d6..1bf994286d 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -93,8 +93,10 @@ $code=<<___; .type bn_mul_mont_gather5,\@function,6 .align 64 bn_mul_mont_gather5: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$7,${num}d jnz .Lmul_enter ___ @@ -108,11 +110,17 @@ $code.=<<___; .Lmul_enter: movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r14 neg $num mov %rsp,%r11 @@ -145,6 +153,7 @@ $code.=<<___; lea .Linc(%rip),%r10 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: lea 128($bp),%r12 # reassign $bp (+size optimization) @@ -431,17 +440,26 @@ $code.=<<___; jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r15 mov -32(%rsi),%r13 +.cfi_restore %r15 mov -24(%rsi),%r12 +.cfi_restore %r15 mov -16(%rsi),%rbp +.cfi_restore %r15 mov -8(%rsi),%rbx +.cfi_restore %r15 lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 ___ {{{ @@ -451,8 +469,10 @@ $code.=<<___; .type bn_mul4x_mont_gather5,\@function,6 .align 32 bn_mul4x_mont_gather5: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -462,11 +482,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmul4x_prologue: .byte 0x67 @@ -522,22 +548,32 @@ $code.=<<___; neg $num mov %rax,40(%rsp) +.cfi_cfa_expression %rsp+40,deref,+8 .Lmul4x_body: call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .type mul4x_internal,\@abi-omnipotent @@ -1061,7 +1097,9 @@ $code.=<<___; .type bn_power5,\@function,6 .align 32 bn_power5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($addx); mov OPENSSL_ia32cap_P+8(%rip),%r11d @@ -1071,11 +1109,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpower5_prologue: shl \$3,${num}d # convert $num to bytes @@ -1140,6 +1184,7 @@ $code.=<<___; # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpower5_body: movq $rptr,%xmm1 # save $rptr, used in sqr8x movq $nptr,%xmm2 # save $nptr @@ -1166,16 +1211,25 @@ $code.=<<___; call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpower5_epilogue: ret +.cfi_endproc .size bn_power5,.-bn_power5 .globl bn_sqr8x_internal @@ -2055,14 +2109,22 @@ bn_from_montgomery: .type bn_from_mont8x,\@function,6 .align 32 bn_from_mont8x: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lfrom_prologue: shl \$3,${num}d # convert $num to bytes @@ -2127,6 +2189,7 @@ bn_from_mont8x: # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lfrom_body: mov $num,%r11 lea 48(%rsp),%rax @@ -2170,7 +2233,6 @@ $code.=<<___ if ($addx); pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 @@ -2182,11 +2244,12 @@ $code.=<<___; pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 .Lfrom_mont_zero: + mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 movdqa %xmm0,16*0(%rax) movdqa %xmm0,16*1(%rax) movdqa %xmm0,16*2(%rax) @@ -2197,14 +2260,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lfrom_epilogue: ret +.cfi_endproc .size bn_from_mont8x,.-bn_from_mont8x ___ } @@ -2217,14 +2288,22 @@ $code.=<<___; .type bn_mulx4x_mont_gather5,\@function,6 .align 32 bn_mulx4x_mont_gather5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -2290,21 +2369,31 @@ bn_mulx4x_mont_gather5: # mov $n0, 32(%rsp) # save *n0 mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lmulx4x_body: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 .type mulx4x_internal,\@abi-omnipotent @@ -2682,14 +2771,22 @@ $code.=<<___; .type bn_powerx5,\@function,6 .align 32 bn_powerx5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lpowerx5_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpowerx5_prologue: shl \$3,${num}d # convert $num to bytes @@ -2761,6 +2858,7 @@ bn_powerx5: movq $bptr,%xmm4 mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpowerx5_body: call __bn_sqrx8x_internal @@ -2783,17 +2881,26 @@ bn_powerx5: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpowerx5_epilogue: ret +.cfi_endproc .size bn_powerx5,.-bn_powerx5 .globl bn_sqrx8x_internal -- 2.34.1