From e7d1363d12ce66e2ad7b045a0a9794b17e2e8ab9 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 14 Aug 2011 09:06:06 +0000 Subject: [PATCH] x86_64-mont5.pl: add missing Win64 support. --- crypto/bn/asm/x86_64-mont5.pl | 176 ++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 6 deletions(-) diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index b7b1ef6161..fe335dfd44 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -64,6 +64,7 @@ bn_mul_mont_gather5: .align 16 .Lmul_enter: + mov ${num}d,${num}d mov `($win64?56:8)`(%rsp),%r10d # load 7th argument push %rbx push %rbp @@ -71,10 +72,16 @@ bn_mul_mont_gather5: push %r13 push %r14 push %r15 - - mov ${num}d,${num}d - lea 2($num),%r11 +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +.Lmul_alloca: +___ +$code.=<<___; mov %rsp,%rax + lea 2($num),%r11 neg %r11 lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)) and \$-1024,%rsp # minimize TLB usage @@ -313,6 +320,13 @@ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps (%rsi),%xmm6 + movaps 0x10(%rsi),%xmm7 + lea 0x28(%rsi),%rsi +___ +$code.=<<___; mov (%rsi),%r15 mov 8(%rsi),%r14 mov 16(%rsi),%r13 @@ -332,6 +346,7 @@ $code.=<<___; .align 16 bn_mul4x_mont_gather5: .Lmul4x_enter: + mov ${num}d,${num}d mov `($win64?56:8)`(%rsp),%r10d # load 7th argument push %rbx push %rbp @@ -339,10 +354,16 @@ bn_mul4x_mont_gather5: push %r13 push %r14 push %r15 - - mov ${num}d,${num}d +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +.Lmul4x_alloca: +___ +$code.=<<___; + mov %rsp,%rax lea 4($num),%r11 - mov %rsp,%rax # !!!! neg %r11 lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)) and \$-1024,%rsp # minimize TLB usage @@ -787,6 +808,13 @@ ___ $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps (%rsi),%xmm6 + movaps 0x10(%rsi),%xmm7 + lea 0x28(%rsi),%rsi +___ +$code.=<<___; mov (%rsi),%r15 mov 8(%rsi),%r14 mov 16(%rsi),%r13 @@ -828,6 +856,142 @@ $code.=<<___; .asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " ___ +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type mul_handler,\@abi-omnipotent +.align 16 +mul_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->RipRipRsp + + mov 8(%r11),%r10d # HandlerData[2] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 192($context),%r10 # pull $num + mov 8(%rax,%r10,8),%rax # pull saved stack pointer + + movaps (%rax),%xmm0 + movaps 16(%rax),%xmm1 + lea `40+48`(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + movups %xmm0,512($context) # restore context->Xmm6 + movups %xmm1,528($context) # restore context->Xmm7 + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size mul_handler,.-mul_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_bn_mul_mont_gather5 + .rva .LSEH_end_bn_mul_mont_gather5 + .rva .LSEH_info_bn_mul_mont_gather5 + + .rva .LSEH_begin_bn_mul4x_mont_gather5 + .rva .LSEH_end_bn_mul4x_mont_gather5 + .rva .LSEH_info_bn_mul4x_mont_gather5 + +.section .xdata +.align 8 +.LSEH_info_bn_mul_mont_gather5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lmul_alloca,.Lmul_body,.Lmul_epilogue # HandlerData[] +.align 8 +.LSEH_info_bn_mul4x_mont_gather5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] +.align 8 +___ +} + $code =~ s/\`([^\`]*)\`/eval($1)/gem; print $code; -- 2.34.1