-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
$avx = ($1>=10) + ($1>=11);
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
# void sha1_multi_block (
if (1) {
# Atom-specific optimization aiming to eliminate pshufb with high
- # registers [and thus get rid of 48 cycles accumulated penalty]
+ # registers [and thus get rid of 48 cycles accumulated penalty]
@Xi=map("%xmm$_",(0..4));
($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9));
@V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14));
# ...
# $i==13: 14,15,15,15,
# $i==14: 15
-#
+#
# Then at $i==15 Xupdate is applied one iteration in advance...
$code.=<<___ if ($i==0);
movd (@ptr[0]),@Xi[0]
.type sha1_multi_block,\@function,3
.align 32
sha1_multi_block:
+.cfi_startproc
mov OPENSSL_ia32cap_P+4(%rip),%rcx
bt \$61,%rcx # check SHA bit
jc _shaext_shortcut
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbx
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`,%rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
jnz .Loop_grande
.Ldone:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
+.cfi_endproc
.size sha1_multi_block,.-sha1_multi_block
___
{{{
.type sha1_multi_block_shaext,\@function,3
.align 32
sha1_multi_block_shaext:
+.cfi_startproc
_shaext_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap
.Loop_grande_shaext:
- mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num
+ mov $num,`$REG_SZ*17+8`(%rsp) # original $num
xor $num,$num
___
for($i=0;$i<2;$i++) {
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
ret
+.cfi_endproc
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
___
}}}
.type sha1_multi_block_avx,\@function,3
.align 32
sha1_multi_block_avx:
+.cfi_startproc
_avx_shortcut:
___
$code.=<<___ if ($avx>1);
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
jnz .Loop_grande_avx
.Ldone_avx:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
ret
+.cfi_endproc
.size sha1_multi_block_avx,.-sha1_multi_block_avx
___
.type sha1_multi_block_avx2,\@function,3
.align 32
sha1_multi_block_avx2:
+.cfi_startproc
_avx2_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx2:
lea K_XX_XX(%rip),$Tbl
shr \$1,$num
#jnz .Loop_grande_avx2
.Ldone_avx2:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
ret
+.cfi_endproc
.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
___
} }}}