From: David Benjamin Date: Tue, 29 Jan 2019 05:12:15 +0000 (+0000) Subject: Fix some CFI issues in x86_64 assembly X-Git-Tag: openssl-3.0.0-alpha1~2519 X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff_plain;h=c0e8e5007ba5234d4d448e82a1567e0c4467e629 Fix some CFI issues in x86_64 assembly The add/double shortcut in ecp_nistz256-x86_64.pl left one instruction point that did not unwind, and the "slow" path in AES_cbc_encrypt was not annotated correctly. For the latter, add .cfi_{remember,restore}_state support to perlasm. Next, fill in a bunch of functions that are missing no-op .cfi_startproc and .cfi_endproc blocks. libunwind cannot unwind those stack frames otherwise. Finally, work around a bug in libunwind by not encoding rflags. (rflags isn't a callee-saved register, so there's not much need to annotate it anyway.) These were found as part of ABI testing work in BoringSSL. Reviewed-by: Richard Levitte GH: #8109 --- diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl index d84c697910..6e1dcef230 100755 --- a/crypto/aes/asm/aes-x86_64.pl +++ b/crypto/aes/asm/aes-x86_64.pl @@ -554,6 +554,7 @@ $code.=<<___; .type _x86_64_AES_encrypt_compact,\@abi-omnipotent .align 16 _x86_64_AES_encrypt_compact: +.cfi_startproc lea 128($sbox),$inp # size optimization mov 0-128($inp),$acc1 # prefetch Te4 mov 32-128($inp),$acc2 @@ -587,6 +588,7 @@ $code.=<<___; xor 8($key),$s2 xor 12($key),$s3 .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact ___ @@ -1161,6 +1163,7 @@ $code.=<<___; .type _x86_64_AES_decrypt_compact,\@abi-omnipotent .align 16 _x86_64_AES_decrypt_compact: +.cfi_startproc lea 128($sbox),$inp # size optimization mov 0-128($inp),$acc1 # prefetch Td4 mov 32-128($inp),$acc2 @@ -1203,6 +1206,7 @@ $code.=<<___; xor 8($key),$s2 xor 12($key),$s3 .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact ___ @@ -1365,6 +1369,7 @@ AES_set_encrypt_key: .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent .align 16 _x86_64_AES_set_encrypt_key: +.cfi_startproc mov %esi,%ecx # %ecx=bits mov %rdi,%rsi # %rsi=userKey mov %rdx,%rdi # %rdi=key @@ -1546,6 +1551,7 @@ $code.=<<___; mov \$-1,%rax .Lexit: .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key ___ @@ -1728,7 +1734,9 @@ AES_cbc_encrypt: cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq -.cfi_push 49 # %rflags +# This could be .cfi_push 49, but libunwind fails on registers it does not +# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. +.cfi_adjust_cfa_offset 8 push %rbx .cfi_push %rbx push %rbp @@ -1751,6 +1759,7 @@ AES_cbc_encrypt: cmp \$0,%r9 cmoveq %r10,$sbox +.cfi_remember_state mov OPENSSL_ia32cap_P(%rip),%r10d cmp \$$speed_limit,%rdx jb .Lcbc_slow_prologue @@ -1986,6 +1995,7 @@ AES_cbc_encrypt: #--------------------------- SLOW ROUTINE ---------------------------# .align 16 .Lcbc_slow_prologue: +.cfi_restore_state # allocate aligned stack frame... lea -88(%rsp),%rbp and \$-64,%rbp @@ -1997,8 +2007,10 @@ AES_cbc_encrypt: sub %r10,%rbp xchg %rsp,%rbp +.cfi_def_cfa_register %rbp #add \$8,%rsp # reserve for return address! mov %rbp,$_rsp # save %rsp +.cfi_cfa_expression $_rsp,deref,+64 .Lcbc_slow_body: #mov %rdi,$_inp # save copy of inp #mov %rsi,$_out # save copy of out @@ -2187,7 +2199,9 @@ AES_cbc_encrypt: .cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq -.cfi_pop 49 # %rflags +# This could be .cfi_pop 49, but libunwind fails on registers it does not +# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. +.cfi_adjust_cfa_offset -8 .Lcbc_epilogue: ret .cfi_endproc diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl index 508533981d..406377a66f 100644 --- a/crypto/aes/asm/aesni-x86_64.pl +++ b/crypto/aes/asm/aesni-x86_64.pl @@ -274,6 +274,7 @@ $code.=<<___; .type ${PREFIX}_encrypt,\@abi-omnipotent .align 16 ${PREFIX}_encrypt: +.cfi_startproc movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -284,12 +285,14 @@ $code.=<<___; movups $inout0,($out) # output pxor $inout0,$inout0 ret +.cfi_endproc .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt .globl ${PREFIX}_decrypt .type ${PREFIX}_decrypt,\@abi-omnipotent .align 16 ${PREFIX}_decrypt: +.cfi_startproc movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -300,6 +303,7 @@ $code.=<<___; movups $inout0,($out) # output pxor $inout0,$inout0 ret +.cfi_endproc .size ${PREFIX}_decrypt, .-${PREFIX}_decrypt ___ } @@ -325,6 +329,7 @@ $code.=<<___; .type _aesni_${dir}rypt2,\@abi-omnipotent .align 16 _aesni_${dir}rypt2: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -350,6 +355,7 @@ _aesni_${dir}rypt2: aes${dir}last $rndkey0,$inout0 aes${dir}last $rndkey0,$inout1 ret +.cfi_endproc .size _aesni_${dir}rypt2,.-_aesni_${dir}rypt2 ___ } @@ -361,6 +367,7 @@ $code.=<<___; .type _aesni_${dir}rypt3,\@abi-omnipotent .align 16 _aesni_${dir}rypt3: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -391,6 +398,7 @@ _aesni_${dir}rypt3: aes${dir}last $rndkey0,$inout1 aes${dir}last $rndkey0,$inout2 ret +.cfi_endproc .size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 ___ } @@ -406,6 +414,7 @@ $code.=<<___; .type _aesni_${dir}rypt4,\@abi-omnipotent .align 16 _aesni_${dir}rypt4: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -442,6 +451,7 @@ _aesni_${dir}rypt4: aes${dir}last $rndkey0,$inout2 aes${dir}last $rndkey0,$inout3 ret +.cfi_endproc .size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 ___ } @@ -453,6 +463,7 @@ $code.=<<___; .type _aesni_${dir}rypt6,\@abi-omnipotent .align 16 _aesni_${dir}rypt6: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -503,6 +514,7 @@ _aesni_${dir}rypt6: aes${dir}last $rndkey0,$inout4 aes${dir}last $rndkey0,$inout5 ret +.cfi_endproc .size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6 ___ } @@ -514,6 +526,7 @@ $code.=<<___; .type _aesni_${dir}rypt8,\@abi-omnipotent .align 16 _aesni_${dir}rypt8: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -574,6 +587,7 @@ _aesni_${dir}rypt8: aes${dir}last $rndkey0,$inout6 aes${dir}last $rndkey0,$inout7 ret +.cfi_endproc .size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 ___ } @@ -598,6 +612,7 @@ $code.=<<___; .type aesni_ecb_encrypt,\@function,5 .align 16 aesni_ecb_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp @@ -943,6 +958,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size aesni_ecb_encrypt,.-aesni_ecb_encrypt ___ diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl index 5a82ff6360..44651d437a 100644 --- a/crypto/aes/asm/bsaes-x86_64.pl +++ b/crypto/aes/asm/bsaes-x86_64.pl @@ -816,6 +816,7 @@ $code.=<<___; .type _bsaes_encrypt8,\@abi-omnipotent .align 64 _bsaes_encrypt8: +.cfi_startproc lea .LBS0(%rip), $const # constants table movdqa ($key), @XMM[9] # round 0 key @@ -875,11 +876,13 @@ $code.=<<___; pxor @XMM[8], @XMM[0] pxor @XMM[8], @XMM[1] ret +.cfi_endproc .size _bsaes_encrypt8,.-_bsaes_encrypt8 .type _bsaes_decrypt8,\@abi-omnipotent .align 64 _bsaes_decrypt8: +.cfi_startproc lea .LBS0(%rip), $const # constants table movdqa ($key), @XMM[9] # round 0 key @@ -937,6 +940,7 @@ $code.=<<___; pxor @XMM[8], @XMM[0] pxor @XMM[8], @XMM[1] ret +.cfi_endproc .size _bsaes_decrypt8,.-_bsaes_decrypt8 ___ } @@ -971,6 +975,7 @@ $code.=<<___; .type _bsaes_key_convert,\@abi-omnipotent .align 16 _bsaes_key_convert: +.cfi_startproc lea .Lmasks(%rip), $const movdqu ($inp), %xmm7 # load round 0 key lea 0x10($inp), $inp @@ -1049,6 +1054,7 @@ _bsaes_key_convert: movdqa 0x50($const), %xmm7 # .L63 #movdqa %xmm6, ($out) # don't save last round key ret +.cfi_endproc .size _bsaes_key_convert,.-_bsaes_key_convert ___ } diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index 1a95ac2088..d304003173 100644 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -91,6 +91,7 @@ $code.=<<___; .type _vpaes_encrypt_core,\@abi-omnipotent .align 16 _vpaes_encrypt_core: +.cfi_startproc mov %rdx, %r9 mov \$16, %r11 mov 240(%rdx),%eax @@ -171,6 +172,7 @@ _vpaes_encrypt_core: pxor %xmm4, %xmm0 # 0 = A pshufb %xmm1, %xmm0 ret +.cfi_endproc .size _vpaes_encrypt_core,.-_vpaes_encrypt_core ## @@ -181,6 +183,7 @@ _vpaes_encrypt_core: .type _vpaes_decrypt_core,\@abi-omnipotent .align 16 _vpaes_decrypt_core: +.cfi_startproc mov %rdx, %r9 # load key mov 240(%rdx),%eax movdqa %xmm9, %xmm1 @@ -277,6 +280,7 @@ _vpaes_decrypt_core: pxor %xmm4, %xmm0 # 0 = A pshufb %xmm2, %xmm0 ret +.cfi_endproc .size _vpaes_decrypt_core,.-_vpaes_decrypt_core ######################################################## @@ -287,6 +291,7 @@ _vpaes_decrypt_core: .type _vpaes_schedule_core,\@abi-omnipotent .align 16 _vpaes_schedule_core: +.cfi_startproc # rdi = key # rsi = size in bits # rdx = buffer @@ -453,6 +458,7 @@ _vpaes_schedule_core: pxor %xmm6, %xmm6 pxor %xmm7, %xmm7 ret +.cfi_endproc .size _vpaes_schedule_core,.-_vpaes_schedule_core ## @@ -472,6 +478,7 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,\@abi-omnipotent .align 16 _vpaes_schedule_192_smear: +.cfi_startproc pshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a pxor %xmm1, %xmm6 # -> c+d c 0 0 @@ -480,6 +487,7 @@ _vpaes_schedule_192_smear: movdqa %xmm6, %xmm0 movhlps %xmm1, %xmm6 # clobber low side with zeros ret +.cfi_endproc .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear ## @@ -503,6 +511,7 @@ _vpaes_schedule_192_smear: .type _vpaes_schedule_round,\@abi-omnipotent .align 16 _vpaes_schedule_round: +.cfi_startproc # extract rcon from xmm8 pxor %xmm1, %xmm1 palignr \$15, %xmm8, %xmm1 @@ -556,6 +565,7 @@ _vpaes_schedule_low_round: pxor %xmm7, %xmm0 movdqa %xmm0, %xmm7 ret +.cfi_endproc .size _vpaes_schedule_round,.-_vpaes_schedule_round ## @@ -570,6 +580,7 @@ _vpaes_schedule_low_round: .type _vpaes_schedule_transform,\@abi-omnipotent .align 16 _vpaes_schedule_transform: +.cfi_startproc movdqa %xmm9, %xmm1 pandn %xmm0, %xmm1 psrld \$4, %xmm1 @@ -580,6 +591,7 @@ _vpaes_schedule_transform: pshufb %xmm1, %xmm0 pxor %xmm2, %xmm0 ret +.cfi_endproc .size _vpaes_schedule_transform,.-_vpaes_schedule_transform ## @@ -608,6 +620,7 @@ _vpaes_schedule_transform: .type _vpaes_schedule_mangle,\@abi-omnipotent .align 16 _vpaes_schedule_mangle: +.cfi_startproc movdqa %xmm0, %xmm4 # save xmm0 for later movdqa .Lk_mc_forward(%rip),%xmm5 test %rcx, %rcx @@ -672,6 +685,7 @@ _vpaes_schedule_mangle: and \$0x30, %r8 movdqu %xmm3, (%rdx) ret +.cfi_endproc .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle # @@ -681,6 +695,7 @@ _vpaes_schedule_mangle: .type ${PREFIX}_set_encrypt_key,\@function,3 .align 16 ${PREFIX}_set_encrypt_key: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -723,12 +738,14 @@ ___ $code.=<<___; xor %eax,%eax ret +.cfi_endproc .size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key .globl ${PREFIX}_set_decrypt_key .type ${PREFIX}_set_decrypt_key,\@function,3 .align 16 ${PREFIX}_set_decrypt_key: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -776,12 +793,14 @@ ___ $code.=<<___; xor %eax,%eax ret +.cfi_endproc .size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key .globl ${PREFIX}_encrypt .type ${PREFIX}_encrypt,\@function,3 .align 16 ${PREFIX}_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -819,12 +838,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt .globl ${PREFIX}_decrypt .type ${PREFIX}_decrypt,\@function,3 .align 16 ${PREFIX}_decrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -862,6 +883,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_decrypt,.-${PREFIX}_decrypt ___ { @@ -874,6 +896,7 @@ $code.=<<___; .type ${PREFIX}_cbc_encrypt,\@function,6 .align 16 ${PREFIX}_cbc_encrypt: +.cfi_startproc xchg $key,$len ___ ($len,$key)=($key,$len); @@ -944,6 +967,7 @@ ___ $code.=<<___; .Lcbc_abort: ret +.cfi_endproc .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } @@ -957,6 +981,7 @@ $code.=<<___; .type _vpaes_preheat,\@abi-omnipotent .align 16 _vpaes_preheat: +.cfi_startproc lea .Lk_s0F(%rip), %r10 movdqa -0x20(%r10), %xmm10 # .Lk_inv movdqa -0x10(%r10), %xmm11 # .Lk_inv+16 @@ -966,6 +991,7 @@ _vpaes_preheat: movdqa 0x50(%r10), %xmm15 # .Lk_sb2 movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16 ret +.cfi_endproc .size _vpaes_preheat,.-_vpaes_preheat ######################################################## ## ## diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 9e6ec3a196..fddb12ffff 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -1492,6 +1492,7 @@ $code.=<<___; .type rsaz_1024_red2norm_avx2,\@abi-omnipotent .align 32 rsaz_1024_red2norm_avx2: +.cfi_startproc sub \$-128,$inp # size optimization xor %rax,%rax ___ @@ -1525,12 +1526,14 @@ ___ } $code.=<<___; ret +.cfi_endproc .size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 .globl rsaz_1024_norm2red_avx2 .type rsaz_1024_norm2red_avx2,\@abi-omnipotent .align 32 rsaz_1024_norm2red_avx2: +.cfi_startproc sub \$-128,$out # size optimization mov ($inp),@T[0] mov \$0x1fffffff,%eax @@ -1562,6 +1565,7 @@ $code.=<<___; mov @T[0],`8*($j+2)-128`($out) mov @T[0],`8*($j+3)-128`($out) ret +.cfi_endproc .size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 ___ } @@ -1573,6 +1577,7 @@ $code.=<<___; .type rsaz_1024_scatter5_avx2,\@abi-omnipotent .align 32 rsaz_1024_scatter5_avx2: +.cfi_startproc vzeroupper vmovdqu .Lscatter_permd(%rip),%ymm5 shl \$4,$power @@ -1592,6 +1597,7 @@ rsaz_1024_scatter5_avx2: vzeroupper ret +.cfi_endproc .size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 .globl rsaz_1024_gather5_avx2 diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 8fa233ca12..6331566c0c 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -2910,6 +2910,7 @@ bn_powerx5: .align 32 bn_sqrx8x_internal: __bn_sqrx8x_internal: +.cfi_startproc ################################################################## # Squaring part: # @@ -3542,6 +3543,7 @@ __bn_sqrx8x_reduction: cmp 8+8(%rsp),%r8 # end of t[]? jb .Lsqrx8x_reduction_loop ret +.cfi_endproc .size bn_sqrx8x_internal,.-bn_sqrx8x_internal ___ } diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index 0579f45877..a28ee8e947 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -1674,6 +1674,7 @@ $code.=<<___; .type __ecp_nistz256_mul_montq,\@abi-omnipotent .align 32 __ecp_nistz256_mul_montq: +.cfi_startproc ######################################################################## # Multiply a by b[0] mov %rax, $t1 @@ -1885,6 +1886,7 @@ __ecp_nistz256_mul_montq: mov $acc1, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq ################################################################################ @@ -1968,6 +1970,7 @@ $code.=<<___; .type __ecp_nistz256_sqr_montq,\@abi-omnipotent .align 32 __ecp_nistz256_sqr_montq: +.cfi_startproc mov %rax, $acc5 mulq $acc6 # a[1]*a[0] mov %rax, $acc1 @@ -2125,6 +2128,7 @@ __ecp_nistz256_sqr_montq: mov $acc7, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq ___ @@ -2133,6 +2137,7 @@ $code.=<<___; .type __ecp_nistz256_mul_montx,\@abi-omnipotent .align 32 __ecp_nistz256_mul_montx: +.cfi_startproc ######################################################################## # Multiply by b[0] mulx $acc1, $acc0, $acc1 @@ -2295,11 +2300,13 @@ __ecp_nistz256_mul_montx: mov $acc1, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx .type __ecp_nistz256_sqr_montx,\@abi-omnipotent .align 32 __ecp_nistz256_sqr_montx: +.cfi_startproc mulx $acc6, $acc1, $acc2 # a[0]*a[1] mulx $acc7, $t0, $acc3 # a[0]*a[2] xor %eax, %eax @@ -2423,6 +2430,7 @@ __ecp_nistz256_sqr_montx: mov $acc7, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx ___ } @@ -2578,6 +2586,7 @@ ecp_nistz256_scatter_w5: .type ecp_nistz256_gather_w5,\@abi-omnipotent .align 32 ecp_nistz256_gather_w5: +.cfi_startproc ___ $code.=<<___ if ($avx>1); mov OPENSSL_ia32cap_P+8(%rip), %eax @@ -2666,6 +2675,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_gather_w5: .size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5 @@ -2694,6 +2704,7 @@ ecp_nistz256_scatter_w7: .type ecp_nistz256_gather_w7,\@abi-omnipotent .align 32 ecp_nistz256_gather_w7: +.cfi_startproc ___ $code.=<<___ if ($avx>1); mov OPENSSL_ia32cap_P+8(%rip), %eax @@ -2771,6 +2782,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_gather_w7: .size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7 ___ @@ -2787,6 +2799,7 @@ $code.=<<___; .type ecp_nistz256_avx2_gather_w5,\@abi-omnipotent .align 32 ecp_nistz256_avx2_gather_w5: +.cfi_startproc .Lavx2_gather_w5: vzeroupper ___ @@ -2874,6 +2887,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_avx2_gather_w5: .size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5 ___ @@ -2893,6 +2907,7 @@ $code.=<<___; .type ecp_nistz256_avx2_gather_w7,\@abi-omnipotent .align 32 ecp_nistz256_avx2_gather_w7: +.cfi_startproc .Lavx2_gather_w7: vzeroupper ___ @@ -2995,6 +3010,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_avx2_gather_w7: .size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7 ___ @@ -3064,6 +3080,7 @@ $code.=<<___; .type __ecp_nistz256_add_toq,\@abi-omnipotent .align 32 __ecp_nistz256_add_toq: +.cfi_startproc xor $t4,$t4 add 8*0($b_ptr), $a0 adc 8*1($b_ptr), $a1 @@ -3091,11 +3108,13 @@ __ecp_nistz256_add_toq: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq .type __ecp_nistz256_sub_fromq,\@abi-omnipotent .align 32 __ecp_nistz256_sub_fromq: +.cfi_startproc sub 8*0($b_ptr), $a0 sbb 8*1($b_ptr), $a1 mov $a0, $t0 @@ -3122,11 +3141,13 @@ __ecp_nistz256_sub_fromq: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq .type __ecp_nistz256_subq,\@abi-omnipotent .align 32 __ecp_nistz256_subq: +.cfi_startproc sub $a0, $t0 sbb $a1, $t1 mov $t0, $a0 @@ -3149,11 +3170,13 @@ __ecp_nistz256_subq: cmovnz $t3, $a3 ret +.cfi_endproc .size __ecp_nistz256_subq,.-__ecp_nistz256_subq .type __ecp_nistz256_mul_by_2q,\@abi-omnipotent .align 32 __ecp_nistz256_mul_by_2q: +.cfi_startproc xor $t4, $t4 add $a0, $a0 # a0:a3+a0:a3 adc $a1, $a1 @@ -3181,6 +3204,7 @@ __ecp_nistz256_mul_by_2q: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q ___ } @@ -3620,7 +3644,9 @@ $code.=<<___; movq %xmm1, $a_ptr # restore $a_ptr movq %xmm0, $r_ptr # restore $r_ptr add \$`32*(18-5)`, %rsp # difference in frame sizes +.cfi_adjust_cfa_offset `-32*(18-5)` jmp .Lpoint_double_shortcut$x +.cfi_adjust_cfa_offset `32*(18-5)` .align 32 .Ladd_proceed$x: @@ -4156,6 +4182,7 @@ $code.=<<___; .type __ecp_nistz256_add_tox,\@abi-omnipotent .align 32 __ecp_nistz256_add_tox: +.cfi_startproc xor $t4, $t4 adc 8*0($b_ptr), $a0 adc 8*1($b_ptr), $a1 @@ -4184,11 +4211,13 @@ __ecp_nistz256_add_tox: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox .type __ecp_nistz256_sub_fromx,\@abi-omnipotent .align 32 __ecp_nistz256_sub_fromx: +.cfi_startproc xor $t4, $t4 sbb 8*0($b_ptr), $a0 sbb 8*1($b_ptr), $a1 @@ -4217,11 +4246,13 @@ __ecp_nistz256_sub_fromx: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx .type __ecp_nistz256_subx,\@abi-omnipotent .align 32 __ecp_nistz256_subx: +.cfi_startproc xor $t4, $t4 sbb $a0, $t0 sbb $a1, $t1 @@ -4246,11 +4277,13 @@ __ecp_nistz256_subx: cmovc $t3, $a3 ret +.cfi_endproc .size __ecp_nistz256_subx,.-__ecp_nistz256_subx .type __ecp_nistz256_mul_by_2x,\@abi-omnipotent .align 32 __ecp_nistz256_mul_by_2x: +.cfi_startproc xor $t4, $t4 adc $a0, $a0 # a0:a3+a0:a3 adc $a1, $a1 @@ -4279,6 +4312,7 @@ __ecp_nistz256_mul_by_2x: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x ___ } diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 8f8ab62197..d6d0d7527e 100644 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -529,6 +529,7 @@ $code.=<<___; .type gcm_init_clmul,\@abi-omnipotent .align 16 gcm_init_clmul: +.cfi_startproc .L_init_clmul: ___ $code.=<<___ if ($win64); @@ -598,6 +599,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_init_clmul,.-gcm_init_clmul ___ } @@ -609,6 +611,7 @@ $code.=<<___; .type gcm_gmult_clmul,\@abi-omnipotent .align 16 gcm_gmult_clmul: +.cfi_startproc .L_gmult_clmul: movdqu ($Xip),$Xi movdqa .Lbswap_mask(%rip),$T3 @@ -645,6 +648,7 @@ $code.=<<___; pshufb $T3,$Xi movdqu $Xi,($Xip) ret +.cfi_endproc .size gcm_gmult_clmul,.-gcm_gmult_clmul ___ } @@ -658,6 +662,7 @@ $code.=<<___; .type gcm_ghash_clmul,\@abi-omnipotent .align 32 gcm_ghash_clmul: +.cfi_startproc .L_ghash_clmul: ___ $code.=<<___ if ($win64); @@ -1005,6 +1010,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_ghash_clmul,.-gcm_ghash_clmul ___ } @@ -1014,6 +1020,7 @@ $code.=<<___; .type gcm_init_avx,\@abi-omnipotent .align 32 gcm_init_avx: +.cfi_startproc ___ if ($avx) { my ($Htbl,$Xip)=@_4args; @@ -1142,6 +1149,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_init_avx,.-gcm_init_avx ___ } else { @@ -1156,7 +1164,9 @@ $code.=<<___; .type gcm_gmult_avx,\@abi-omnipotent .align 32 gcm_gmult_avx: +.cfi_startproc jmp .L_gmult_clmul +.cfi_endproc .size gcm_gmult_avx,.-gcm_gmult_avx ___ @@ -1165,6 +1175,7 @@ $code.=<<___; .type gcm_ghash_avx,\@abi-omnipotent .align 32 gcm_ghash_avx: +.cfi_startproc ___ if ($avx) { my ($Xip,$Htbl,$inp,$len)=@_4args; @@ -1577,6 +1588,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx ___ } else { diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index 5e5d6cc0b1..080fc8d881 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -541,6 +541,7 @@ my %globals; ); my ($cfa_reg, $cfa_rsp); + my @cfa_stack; # [us]leb128 format is variable-length integer representation base # 2^128, with most significant bit of each byte being 0 denoting @@ -688,6 +689,14 @@ my %globals; cfa_expression($$line))); last; }; + /remember_state/ + && do { push @cfa_stack, [$cfa_reg, $cfa_rsp]; + last; + }; + /restore_state/ + && do { ($cfa_reg, $cfa_rsp) = @{pop @cfa_stack}; + last; + }; } $self->{value} = ".cfi_$dir\t$$line" if ($dir);