s390x assembler update: add support for run-time facility detection [from HEAD].
[openssl.git] / crypto / sha / asm / sha512-x86_64.pl
index 38157b610c94c65cc608e2e35152e91b80f588a0..e6643f8cf613d2addb4591f60c0962bacd6c28db 100755 (executable)
 # sha256_block:-( This is presumably because 64-bit shifts/rotates
 # apparently are not atomic instructions, but implemented in microcode.
 
-$output=shift;
-open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output";
 
 if ($output =~ /512/) {
-       $func="sha512_block";
+       $func="sha512_block_data_order";
        $TABLE="K512";
        $SZ=8;
        @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%rax","%rbx","%rcx","%rdx",
@@ -56,7 +66,7 @@ if ($output =~ /512/) {
        @sigma1=(19,61, 6);
        $rounds=80;
 } else {
-       $func="sha256_block";
+       $func="sha256_block_data_order";
        $TABLE="K256";
        $SZ=4;
        @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx",
@@ -77,9 +87,8 @@ $Tbl="%rbp";
 $_ctx="16*$SZ+0*8(%rsp)";
 $_inp="16*$SZ+1*8(%rsp)";
 $_end="16*$SZ+2*8(%rsp)";
-$_ord="16*$SZ+3*8(%rsp)";
-$_rsp="16*$SZ+4*8(%rsp)";
-$framesz="16*$SZ+5*8";
+$_rsp="16*$SZ+3*8(%rsp)";
+$framesz="16*$SZ+4*8";
 
 
 sub ROUND_00_15()
@@ -181,7 +190,7 @@ $func:
        push    %r13
        push    %r14
        push    %r15
-       mov     %rsp,%rbp               # copy %rsp
+       mov     %rsp,%r11               # copy %rsp
        shl     \$4,%rdx                # num*16
        sub     \$$framesz,%rsp
        lea     ($inp,%rdx,$SZ),%rdx    # inp+num*16*$SZ
@@ -189,11 +198,10 @@ $func:
        mov     $ctx,$_ctx              # save ctx, 1st arg
        mov     $inp,$_inp              # save inp, 2nd arh
        mov     %rdx,$_end              # save end pointer, "3rd" arg
-       mov     %ecx,$_ord              # save host, 4th arg
-       mov     %rbp,$_rsp              # save copy of %rsp
+       mov     %r11,$_rsp              # save copy of %rsp
+.Lprologue:
 
-       .picmeup $Tbl
-       lea     $TABLE-.($Tbl),$Tbl
+       lea     $TABLE(%rip),$Tbl
 
        mov     $SZ*0($ctx),$A
        mov     $SZ*1($ctx),$B
@@ -209,25 +217,6 @@ $func:
 .Lloop:
        xor     $round,$round
 ___
-if ($SZ==4) {
-$code.=<<___;
-       cmpl    \$0,$_ord
-       je      .Ldata_order
-.align 16
-.Lhost_order:
-___
-
-       for($i=0;$i<16;$i++) {
-               $code.="        mov     $SZ*$i($inp),$T1\n";
-               &ROUND_00_15($i,@ROT);
-               unshift(@ROT,pop(@ROT));
-       }
-$code.=<<___;
-       jmp     .Lrounds_16_xx
-.align 16
-.Ldata_order:
-___
-} # 256
        for($i=0;$i<16;$i++) {
                $code.="        mov     $SZ*$i($inp),$T1\n";
                $code.="        bswap   $T1\n";
@@ -272,14 +261,15 @@ $code.=<<___;
        mov     $H,$SZ*7($ctx)
        jb      .Lloop
 
-       mov     $_rsp,%rsp
-       pop     %r15
-       pop     %r14
-       pop     %r13
-       pop     %r12
-       pop     %rbp
-       pop     %rbx
-
+       mov     $_rsp,%rsi
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue:
        ret
 .size  $func,.-$func
 ___
@@ -354,6 +344,113 @@ $TABLE:
 ___
 }
 
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       lea     .Lprologue(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip<.Lprologue
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       lea     .Lepilogue(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lin_prologue
+
+       mov     16*$SZ+3*8(%rax),%rax   # pull $_rsp
+       lea     48(%rax),%rax
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore context->R12
+       mov     %r13,224($context)      # restore context->R13
+       mov     %r14,232($context)      # restore context->R14
+       mov     %r15,240($context)      # restore context->R15
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  se_handler,.-se_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_$func
+       .rva    .LSEH_end_$func
+       .rva    .LSEH_info_$func
+
+.section       .xdata
+.align 8
+.LSEH_info_$func:
+       .byte   9,0,0,0
+       .rva    se_handler
+___
+}
+
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
 print $code;
 close STDOUT;