ec/asm/x25519-x86_64.pl: add CFI directives and Windows SE handler.
authorAndy Polyakov <appro@openssl.org>
Wed, 11 Jul 2018 20:36:49 +0000 (22:36 +0200)
committerAndy Polyakov <appro@openssl.org>
Sun, 15 Jul 2018 17:05:57 +0000 (19:05 +0200)
Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)

crypto/ec/asm/x25519-x86_64.pl

index 0936d5bc6da83f9b7ebb9abcc11977b467ee311c..da81e065d4d217b7c4f1aa8892b54b827224302d 100755 (executable)
@@ -102,13 +102,22 @@ $code.=<<___;
 .type  x25519_fe51_mul,\@function,3
 .align 32
 x25519_fe51_mul:
+.cfi_startproc
        push    %rbp
+.cfi_push      %rbp
        push    %rbx
+.cfi_push      %rbx
        push    %r12
+.cfi_push      %r12
        push    %r13
+.cfi_push      %r13
        push    %r14
+.cfi_push      %r14
        push    %r15
+.cfi_push      %r15
        lea     -8*5(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_mul_body:
 
        mov     8*0(%rsi),%rax          # f[0]
        mov     8*0(%rdx),%r11          # load g[0-4]
@@ -236,19 +245,30 @@ x25519_fe51_mul:
 
        mov     8*4(%rsp),%rdi          # restore 1st argument
        jmp     .Lreduce51
+.Lfe51_mul_epilogue:
+.cfi_endproc
 .size  x25519_fe51_mul,.-x25519_fe51_mul
 
 .globl x25519_fe51_sqr
 .type  x25519_fe51_sqr,\@function,2
 .align 32
 x25519_fe51_sqr:
+.cfi_startproc
        push    %rbp
+.cfi_push      %rbp
        push    %rbx
+.cfi_push      %rbx
        push    %r12
+.cfi_push      %r12
        push    %r13
+.cfi_push      %r13
        push    %r14
+.cfi_push      %r14
        push    %r15
+.cfi_push      %r15
        lea     -8*5(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_sqr_body:
 
        mov     8*0(%rsi),%rax          # g[0]
        mov     8*2(%rsi),%r15          # g[2]
@@ -391,27 +411,45 @@ x25519_fe51_sqr:
        mov     %r10,8*4(%rdi)
 
        mov     8*5(%rsp),%r15
+.cfi_restore   %r15
        mov     8*6(%rsp),%r14
+.cfi_restore   %r14
        mov     8*7(%rsp),%r13
+.cfi_restore   %r13
        mov     8*8(%rsp),%r12
+.cfi_restore   %r12
        mov     8*9(%rsp),%rbx
+.cfi_restore   %rbx
        mov     8*10(%rsp),%rbp
+.cfi_restore   %rbp
        lea     8*11(%rsp),%rsp
+.cfi_adjust_cfa_offset 88
+.Lfe51_sqr_epilogue:
        ret
+.cfi_endproc
 .size  x25519_fe51_sqr,.-x25519_fe51_sqr
 
 .globl x25519_fe51_mul121666
 .type  x25519_fe51_mul121666,\@function,2
 .align 32
 x25519_fe51_mul121666:
+.cfi_startproc
        push    %rbp
+.cfi_push      %rbp
        push    %rbx
+.cfi_push      %rbx
        push    %r12
+.cfi_push      %r12
        push    %r13
+.cfi_push      %r13
        push    %r14
+.cfi_push      %r14
        push    %r15
-       mov     \$121666,%eax
+.cfi_push      %r15
        lea     -8*5(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_mul121666_body:
+       mov     \$121666,%eax
 
        mulq    8*0(%rsi)
        mov     %rax,%rbx               # %rbx:%rcx = h0
@@ -434,6 +472,8 @@ x25519_fe51_mul121666:
        mov     %rdx,%r15
 
        jmp     .Lreduce51
+.Lfe51_mul121666_epilogue:
+.cfi_endproc
 .size  x25519_fe51_mul121666,.-x25519_fe51_mul121666
 ___
 ########################################################################
@@ -460,14 +500,24 @@ x25519_fe64_eligible:
 .type  x25519_fe64_mul,\@function,3
 .align 32
 x25519_fe64_mul:
+.cfi_startproc
        push    %rbp
+.cfi_push      %rbp
        push    %rbx
+.cfi_push      %rbx
        push    %r12
+.cfi_push      %r12
        push    %r13
+.cfi_push      %r13
        push    %r14
+.cfi_push      %r14
        push    %r15
+.cfi_push      %r15
        push    %rdi                    # offload dst
+.cfi_push      %rdi
        lea     -8*2(%rsp),%rsp
+.cfi_adjust_cfa_offset 16
+.Lfe64_mul_body:
 
        mov     %rdx,%rax
        mov     8*0(%rdx),%rbp          # b[0]
@@ -534,20 +584,32 @@ x25519_fe64_mul:
        adox    %rdi,$acc7              # of=0
 
        jmp     .Lreduce64
+.Lfe64_mul_epilogue:
+.cfi_endproc
 .size  x25519_fe64_mul,.-x25519_fe64_mul
 
 .globl x25519_fe64_sqr
 .type  x25519_fe64_sqr,\@function,2
 .align 32
 x25519_fe64_sqr:
+.cfi_startproc
        push    %rbp
+.cfi_push      %rbp
        push    %rbx
+.cfi_push      %rbx
        push    %r12
+.cfi_push      %r12
        push    %r13
+.cfi_push      %r13
        push    %r14
+.cfi_push      %r14
        push    %r15
+.cfi_push      %r15
        push    %rdi                    # offload dst
+.cfi_push      %rdi
        lea     -8*2(%rsp),%rsp
+.cfi_adjust_cfa_offset 16
+.Lfe64_sqr_body:
 
        mov     8*0(%rsi),%rdx          # a[0]
        mov     8*1(%rsi),%rcx          # a[1]
@@ -637,19 +699,29 @@ x25519_fe64_sqr:
        mov     $acc0,8*0(%rdi)
 
        mov     8*3(%rsp),%r15
+.cfi_restore   %r15
        mov     8*4(%rsp),%r14
+.cfi_restore   %r14
        mov     8*5(%rsp),%r13
+.cfi_restore   %r13
        mov     8*6(%rsp),%r12
+.cfi_restore   %r12
        mov     8*7(%rsp),%rbx
+.cfi_restore   %rbx
        mov     8*8(%rsp),%rbp
+.cfi_restore   %rbp
        lea     8*9(%rsp),%rsp
+.cfi_adjust_cfa_offset 88
+.Lfe64_sqr_epilogue:
        ret
+.cfi_endproc
 .size  x25519_fe64_sqr,.-x25519_fe64_sqr
 
 .globl x25519_fe64_mul121666
 .type  x25519_fe64_mul121666,\@function,2
 .align 32
 x25519_fe64_mul121666:
+.Lfe64_mul121666_body:
        mov     \$121666,%edx
        mulx    8*0(%rsi),$acc0,%rcx
        mulx    8*1(%rsi),$acc1,%rax
@@ -676,6 +748,7 @@ x25519_fe64_mul121666:
        mov     $acc3,8*3(%rdi)
        mov     $acc0,8*0(%rdi)
 
+.Lfe64_mul121666_epilogue:
        ret
 .size  x25519_fe64_mul121666,.-x25519_fe64_mul121666
 
@@ -683,6 +756,7 @@ x25519_fe64_mul121666:
 .type  x25519_fe64_add,\@function,3
 .align 32
 x25519_fe64_add:
+.Lfe64_add_body:
        mov     8*0(%rsi),$acc0
        mov     8*1(%rsi),$acc1
        mov     8*2(%rsi),$acc2
@@ -709,6 +783,7 @@ x25519_fe64_add:
        add     %rax,$acc0
        mov     $acc0,8*0(%rdi)
 
+.Lfe64_add_epilogue:
        ret
 .size  x25519_fe64_add,.-x25519_fe64_add
 
@@ -716,6 +791,7 @@ x25519_fe64_add:
 .type  x25519_fe64_sub,\@function,3
 .align 32
 x25519_fe64_sub:
+.Lfe64_sub_body:
        mov     8*0(%rsi),$acc0
        mov     8*1(%rsi),$acc1
        mov     8*2(%rsi),$acc2
@@ -742,6 +818,7 @@ x25519_fe64_sub:
        sub     %rax,$acc0
        mov     $acc0,8*0(%rdi)
 
+.Lfe64_sub_epilogue:
        ret
 .size  x25519_fe64_sub,.-x25519_fe64_sub
 
@@ -749,6 +826,7 @@ x25519_fe64_sub:
 .type  x25519_fe64_tobytes,\@function,2
 .align 32
 x25519_fe64_tobytes:
+.Lfe64_to_body:
        mov     8*0(%rsi),$acc0
        mov     8*1(%rsi),$acc1
        mov     8*2(%rsi),$acc2
@@ -782,6 +860,7 @@ x25519_fe64_tobytes:
        mov     $acc2,8*2(%rdi)
        mov     $acc3,8*3(%rdi)
 
+.Lfe64_to_epilogue:
        ret
 .size  x25519_fe64_tobytes,.-x25519_fe64_tobytes
 ___
@@ -817,6 +896,222 @@ $code.=<<___;
 .asciz "X25519 primitives for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 ___
 
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+
+.type  short_handler,\@abi-omnipotent
+.align 16
+short_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<end of prologue label
+       jb      .Lcommon_seh_tail
+
+       mov     152($context),%rax      # pull context->Rsp
+       jmp     .Lcommon_seh_tail
+.size  short_handler,.-short_handler
+
+.type  full_handler,\@abi-omnipotent
+.align 16
+full_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<end of prologue label
+       jb      .Lcommon_seh_tail
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lcommon_seh_tail
+
+       mov     8(%r11),%r10d           # HandlerData[2]
+       lea     (%rax,%r10),%rax
+
+       mov     -8(%rax),%rbp
+       mov     -16(%rax),%rbx
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore context->R12
+       mov     %r13,224($context)      # restore context->R13
+       mov     %r14,232($context)      # restore context->R14
+       mov     %r15,240($context)      # restore context->R15
+
+.Lcommon_seh_tail:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  full_handler,.-full_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_x25519_fe51_mul
+       .rva    .LSEH_end_x25519_fe51_mul
+       .rva    .LSEH_info_x25519_fe51_mul
+
+       .rva    .LSEH_begin_x25519_fe51_sqr
+       .rva    .LSEH_end_x25519_fe51_sqr
+       .rva    .LSEH_info_x25519_fe51_sqr
+
+       .rva    .LSEH_begin_x25519_fe51_mul121666
+       .rva    .LSEH_end_x25519_fe51_mul121666
+       .rva    .LSEH_info_x25519_fe51_mul121666
+___
+$code.=<<___   if ($addx);
+       .rva    .LSEH_begin_x25519_fe64_mul
+       .rva    .LSEH_end_x25519_fe64_mul
+       .rva    .LSEH_info_x25519_fe64_mul
+
+       .rva    .LSEH_begin_x25519_fe64_sqr
+       .rva    .LSEH_end_x25519_fe64_sqr
+       .rva    .LSEH_info_x25519_fe64_sqr
+
+       .rva    .LSEH_begin_x25519_fe64_mul121666
+       .rva    .LSEH_end_x25519_fe64_mul121666
+       .rva    .LSEH_info_x25519_fe64_mul121666
+
+       .rva    .LSEH_begin_x25519_fe64_add
+       .rva    .LSEH_end_x25519_fe64_add
+       .rva    .LSEH_info_x25519_fe64_add
+
+       .rva    .LSEH_begin_x25519_fe64_sub
+       .rva    .LSEH_end_x25519_fe64_sub
+       .rva    .LSEH_info_x25519_fe64_sub
+
+       .rva    .LSEH_begin_x25519_fe64_tobytes
+       .rva    .LSEH_end_x25519_fe64_tobytes
+       .rva    .LSEH_info_x25519_fe64_tobytes
+___
+$code.=<<___;
+.section       .xdata
+.align 8
+.LSEH_info_x25519_fe51_mul:
+       .byte   9,0,0,0
+       .rva    full_handler
+       .rva    .Lfe51_mul_body,.Lfe51_mul_epilogue     # HandlerData[]
+       .long   88,0
+.LSEH_info_x25519_fe51_sqr:
+       .byte   9,0,0,0
+       .rva    full_handler
+       .rva    .Lfe51_sqr_body,.Lfe51_sqr_epilogue     # HandlerData[]
+       .long   88,0
+.LSEH_info_x25519_fe51_mul121666:
+       .byte   9,0,0,0
+       .rva    full_handler
+       .rva    .Lfe51_mul121666_body,.Lfe51_mul121666_epilogue # HandlerData[]
+       .long   88,0
+___
+$code.=<<___   if ($addx);
+.LSEH_info_x25519_fe64_mul:
+       .byte   9,0,0,0
+       .rva    full_handler
+       .rva    .Lfe64_mul_body,.Lfe64_mul_epilogue     # HandlerData[]
+       .long   72,0
+.LSEH_info_x25519_fe64_sqr:
+       .byte   9,0,0,0
+       .rva    full_handler
+       .rva    .Lfe64_sqr_body,.Lfe64_sqr_epilogue     # HandlerData[]
+       .long   72,0
+.LSEH_info_x25519_fe64_mul121666:
+       .byte   9,0,0,0
+       .rva    short_handler
+       .rva    .Lfe64_mul121666_body,.Lfe64_mul121666_epilogue # HandlerData[]
+.LSEH_info_x25519_fe64_add:
+       .byte   9,0,0,0
+       .rva    short_handler
+       .rva    .Lfe64_add_body,.Lfe64_add_epilogue     # HandlerData[]
+.LSEH_info_x25519_fe64_sub:
+       .byte   9,0,0,0
+       .rva    short_handler
+       .rva    .Lfe64_sub_body,.Lfe64_sub_epilogue     # HandlerData[]
+.LSEH_info_x25519_fe64_tobytes:
+       .byte   9,0,0,0
+       .rva    short_handler
+       .rva    .Lfe64_to_body,.Lfe64_to_epilogue       # HandlerData[]
+___
+}
+
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
 print $code;
 close STDOUT;