#! /usr/bin/env perl
# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
#
-# Licensed under the OpenSSL license (the "License"). You may not use
+# Licensed under the Apache License 2.0 (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
# Haswell 5.45 4.15/+31% 3.57/+53%
# Skylake 5.18 4.06/+28% 3.54/+46%
# Bulldozer 9.11 5.95/+53%
+# Ryzen 4.75 3.80/+24% 1.93/+150%(**)
# VIA Nano 9.32 7.15/+30%
# Atom 10.3 9.17/+12%
# Silvermont 13.1(*) 9.37/+40%
+# Knights L 13.2(*) 9.68/+36% 8.30/+59%
# Goldmont 8.13 6.42/+27% 1.70/+380%(**)
#
# (*) obviously suboptimal result, nothing was done about it,
# because SSSE3 code is compiled unconditionally;
# (**) SHAEXT result
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$shaext=1; ### set to zero if compiling for 1.0.1
$avx=1 if (!$shaext && $avx);
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
+ or die "can't call $xlate: $!";
*STDOUT=*OUT;
$ctx="%rdi"; # 1st arg
.type sha1_block_data_order,\@function,3
.align 16
sha1_block_data_order:
+.cfi_startproc
mov OPENSSL_ia32cap_P+0(%rip),%r9d
mov OPENSSL_ia32cap_P+4(%rip),%r8d
mov OPENSSL_ia32cap_P+8(%rip),%r10d
.align 16
.Lialu:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
mov %rdi,$ctx # reassigned argument
sub \$`8+16*4`,%rsp
mov %rsi,$inp # reassigned argument
and \$-64,%rsp
mov %rdx,$num # reassigned argument
mov %rax,`16*4`(%rsp)
+.cfi_cfa_expression %rsp+64,deref,+8
.Lprologue:
mov 0($ctx),$A
jnz .Lloop
mov `16*4`(%rsp),%rsi
+.cfi_def_cfa %rsi,8
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
+.cfi_endproc
.size sha1_block_data_order,.-sha1_block_data_order
___
if ($shaext) {{{
.align 32
sha1_block_data_order_shaext:
_shaext_shortcut:
+.cfi_startproc
___
$code.=<<___ if ($win64);
lea `-8-4*16`(%rsp),%rsp
___
$code.=<<___;
ret
+.cfi_endproc
.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
___
}}}
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
+.cfi_startproc
mov %rsp,$fp # frame pointer
+.cfi_def_cfa_register $fp
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13 # redundant, done to share Win64 SE handler
+.cfi_push %r13
push %r14
+.cfi_push %r14
lea `-64-($win64?6*16:0)`(%rsp),%rsp
___
$code.=<<___ if ($win64);
$code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
}
-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
___
$code.=<<___;
mov -40($fp),%r14
+.cfi_restore %r14
mov -32($fp),%r13
+.cfi_restore %r13
mov -24($fp),%r12
+.cfi_restore %r12
mov -16($fp),%rbp
+.cfi_restore %rbp
mov -8($fp),%rbx
+.cfi_restore %rbx
lea ($fp),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
ret
+.cfi_endproc
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
___
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
+.cfi_startproc
mov %rsp,$fp
+.cfi_def_cfa_register $fp
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13 # redundant, done to share Win64 SE handler
+.cfi_push %r13
push %r14
+.cfi_push %r14
lea `-64-($win64?6*16:0)`(%rsp),%rsp
vzeroupper
___
jmp .Loop_avx
___
-sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_avx_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
___
$code.=<<___;
mov -40($fp),%r14
+.cfi_restore %r14
mov -32($fp),%r13
+.cfi_restore %r13
mov -24($fp),%r12
+.cfi_restore %r12
mov -16($fp),%rbp
+.cfi_restore %rbp
mov -8($fp),%rbx
+.cfi_restore %rbx
lea ($fp),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
ret
+.cfi_endproc
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
___
.align 16
sha1_block_data_order_avx2:
_avx2_shortcut:
+.cfi_startproc
mov %rsp,$fp
+.cfi_def_cfa_register $fp
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
vzeroupper
___
$code.=<<___ if ($win64);
)
}
-sub Xupdate_avx2_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_avx2_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body,&$body); # 35 instructions
___
$code.=<<___;
mov -40($fp),%r14
+.cfi_restore %r14
mov -32($fp),%r13
+.cfi_restore %r13
mov -24($fp),%r12
+.cfi_restore %r12
mov -16($fp),%rbp
+.cfi_restore %rbp
mov -8($fp),%rbx
+.cfi_restore %rbx
lea ($fp),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
ret
+.cfi_endproc
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
___
}
mov -40(%rax),%r14
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore cotnext->R12
- mov %r13,224($context) # restore cotnext->R13
- mov %r14,232($context) # restore cotnext->R14
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
.Lcommon_seh_tail:
mov 8(%rax),%rdi