-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. Rights for redistribution and usage in source and binary
-# forms are granted according to the OpenSSL license.
+# forms are granted according to the License.
# ====================================================================
#
# whirlpool_block for x86_64.
# 3 on Opteron] and which is *unacceptably* slow with 64-bit
# operand.
-$output=shift;
-open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
+ or die "can't call $xlate: $!";
+*STDOUT=*OUT;
sub L() { $code.=".byte ".join(',',@_)."\n"; }
sub LL(){ $code.=".byte ".join(',',@_).",".join(',',@_)."\n"; }
.type $func,\@function,3
.align 16
$func:
+.cfi_startproc
+ mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
- mov %rsp,%rax
sub \$128+40,%rsp
and \$-64,%rsp
- lea 128(%rsp),%rbx
- mov %rdi,0(%rbx) # save parameter block
- mov %rsi,8(%rbx)
- mov %rdx,16(%rbx)
- mov %rax,32(%rbx) # saved stack pointer
+ lea 128(%rsp),%r10
+ mov %rdi,0(%r10) # save parameter block
+ mov %rsi,8(%r10)
+ mov %rdx,16(%r10)
+ mov %rax,32(%r10) # saved stack pointer
+.cfi_cfa_expression %rsp+`128+32`,deref,+8
+.Lprologue:
- .picmeup %rbp
- lea $table-.(%rbp),%rbp
+ mov %r10,%rbx
+ lea $table(%rip),%rbp
xor %rcx,%rcx
xor %rdx,%rdx
$code.=<<___;
xor %rsi,%rsi
mov %rsi,24(%rbx) # zero round counter
+ jmp .Lround
.align 16
.Lround:
mov 4096(%rbp,%rsi,8),@mm[0] # rc[r]
mov 0(%rsp),%eax
mov 4(%rsp),%ebx
+ movz %al,%ecx
+ movz %ah,%edx
___
for($i=0;$i<8;$i++) {
my $func = ($i==0)? "mov" : "xor";
$code.=<<___;
- mov %al,%cl
- mov %ah,%dl
+ shr \$16,%eax
lea (%rcx,%rcx),%rsi
+ movz %al,%ecx
lea (%rdx,%rdx),%rdi
- shr \$16,%eax
+ movz %ah,%edx
xor 0(%rbp,%rsi,8),@mm[0]
$func 7(%rbp,%rdi,8),@mm[1]
- mov %al,%cl
- mov %ah,%dl
mov $i*8+8(%rsp),%eax # ($i+1)*8
lea (%rcx,%rcx),%rsi
+ movz %bl,%ecx
lea (%rdx,%rdx),%rdi
+ movz %bh,%edx
$func 6(%rbp,%rsi,8),@mm[2]
$func 5(%rbp,%rdi,8),@mm[3]
- mov %bl,%cl
- mov %bh,%dl
+ shr \$16,%ebx
lea (%rcx,%rcx),%rsi
+ movz %bl,%ecx
lea (%rdx,%rdx),%rdi
- shr \$16,%ebx
+ movz %bh,%edx
$func 4(%rbp,%rsi,8),@mm[4]
$func 3(%rbp,%rdi,8),@mm[5]
- mov %bl,%cl
- mov %bh,%dl
mov $i*8+8+4(%rsp),%ebx # ($i+1)*8+4
lea (%rcx,%rcx),%rsi
+ movz %al,%ecx
lea (%rdx,%rdx),%rdi
+ movz %ah,%edx
$func 2(%rbp,%rsi,8),@mm[6]
$func 1(%rbp,%rdi,8),@mm[7]
___
for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; } # K=L
for($i=0;$i<8;$i++) {
$code.=<<___;
- mov %al,%cl
- mov %ah,%dl
+ shr \$16,%eax
lea (%rcx,%rcx),%rsi
+ movz %al,%ecx
lea (%rdx,%rdx),%rdi
- shr \$16,%eax
+ movz %ah,%edx
xor 0(%rbp,%rsi,8),@mm[0]
xor 7(%rbp,%rdi,8),@mm[1]
- mov %al,%cl
- mov %ah,%dl
`"mov 64+$i*8+8(%rsp),%eax" if($i<7);` # 64+($i+1)*8
lea (%rcx,%rcx),%rsi
+ movz %bl,%ecx
lea (%rdx,%rdx),%rdi
+ movz %bh,%edx
xor 6(%rbp,%rsi,8),@mm[2]
xor 5(%rbp,%rdi,8),@mm[3]
- mov %bl,%cl
- mov %bh,%dl
+ shr \$16,%ebx
lea (%rcx,%rcx),%rsi
+ movz %bl,%ecx
lea (%rdx,%rdx),%rdi
- shr \$16,%ebx
+ movz %bh,%edx
xor 4(%rbp,%rsi,8),@mm[4]
xor 3(%rbp,%rdi,8),@mm[5]
- mov %bl,%cl
- mov %bh,%dl
`"mov 64+$i*8+8+4(%rsp),%ebx" if($i<7);` # 64+($i+1)*8+4
lea (%rcx,%rcx),%rsi
+ movz %al,%ecx
lea (%rdx,%rdx),%rdi
+ movz %ah,%edx
xor 2(%rbp,%rsi,8),@mm[6]
xor 1(%rbp,%rdi,8),@mm[7]
___
mov %rax,16(%rbx)
jmp .Louterloop
.Lalldone:
- mov 32(%rbx),%rsp # restore saved pointer
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
+ mov 32(%rbx),%rsi # restore saved pointer
+.cfi_def_cfa %rsi,8
+ mov -48(%rsi),%r15
+.cfi_restore %r15
+ mov -40(%rsi),%r14
+.cfi_restore %r14
+ mov -32(%rsi),%r13
+.cfi_restore %r13
+ mov -24(%rsi),%r12
+.cfi_restore %r12
+ mov -16(%rsi),%rbp
+.cfi_restore %rbp
+ mov -8(%rsi),%rbx
+.cfi_restore %rbx
+ lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lepilogue:
ret
+.cfi_endproc
.size $func,.-$func
+.section .rodata align=64
.align 64
.type $table,\@object
$table:
&L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
&L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+.type se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ lea .Lprologue(%rip),%r10
+ cmp %r10,%rbx # context->Rip<.Lprologue
+ jb .Lin_prologue
+
+ mov 152($context),%rax # pull context->Rsp
+
+ lea .Lepilogue(%rip),%r10
+ cmp %r10,%rbx # context->Rip>=.Lepilogue
+ jae .Lin_prologue
+
+ mov 128+32(%rax),%rax # pull saved stack pointer
+
+ mov -8(%rax),%rbx
+ mov -16(%rax),%rbp
+ mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
+ mov -48(%rax),%r15
+ mov %rbx,144($context) # restore context->Rbx
+ mov %rbp,160($context) # restore context->Rbp
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R15
+
+.Lin_prologue:
+ mov 8(%rax),%rdi
+ mov 16(%rax),%rsi
+ mov %rax,152($context) # restore context->Rsp
+ mov %rsi,168($context) # restore context->Rsi
+ mov %rdi,176($context) # restore context->Rdi
+
+ mov 40($disp),%rdi # disp->ContextRecord
+ mov $context,%rsi # context
+ mov \$154,%ecx # sizeof(CONTEXT)
+ .long 0xa548f3fc # cld; rep movsq
+
+ mov $disp,%rsi
+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
+ mov 40(%rsi),%r10 # disp->ContextRecord
+ lea 56(%rsi),%r11 # &disp->HandlerData
+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
+ mov %r10,32(%rsp) # arg5
+ mov %r11,40(%rsp) # arg6
+ mov %r12,48(%rsp) # arg7
+ mov %rcx,56(%rsp) # arg8, (NULL)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ mov \$1,%eax # ExceptionContinueSearch
+ add \$64,%rsp
+ popfq
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ pop %rdi
+ pop %rsi
+ ret
+.size se_handler,.-se_handler
+
+.section .pdata
+.align 4
+ .rva .LSEH_begin_$func
+ .rva .LSEH_end_$func
+ .rva .LSEH_info_$func
+
+.section .xdata
+.align 8
+.LSEH_info_$func:
+ .byte 9,0,0,0
+ .rva se_handler
+___
+}
+
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
-close STDOUT;
+close STDOUT or die "error closing STDOUT: $!";