projects
/
openssl.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
sha/asm/sha512p8-ppc.pl: add POWER8 performance data.
[openssl.git]
/
crypto
/
sha
/
asm
/
sha1-mb-x86_64.pl
diff --git
a/crypto/sha/asm/sha1-mb-x86_64.pl
b/crypto/sha/asm/sha1-mb-x86_64.pl
index 099c803ebc41e76ef6c90aeb209d31fd1687e6c7..56e15292a78074ba5fd215aafc231a7988c83642 100644
(file)
--- a/
crypto/sha/asm/sha1-mb-x86_64.pl
+++ b/
crypto/sha/asm/sha1-mb-x86_64.pl
@@
-1,4
+1,11
@@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@
-59,11
+66,11
@@
if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$avx = ($1>=10) + ($1>=11);
}
$avx = ($1>=10) + ($1>=11);
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /(
^clang version|
based on LLVM) ([3-9]\.[0-9]+)/) {
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(
(?:^clang|LLVM) version|.*
based on LLVM) ([3-9]\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\"
$xlate $flavour $output
";
+open OUT,"| \"$^X\"
\"$xlate\" $flavour \"$output\"
";
*STDOUT=*OUT;
# void sha1_multi_block (
*STDOUT=*OUT;
# void sha1_multi_block (
@@
-88,7
+95,7
@@
$K="%xmm15";
if (1) {
# Atom-specific optimization aiming to eliminate pshufb with high
if (1) {
# Atom-specific optimization aiming to eliminate pshufb with high
- # registers [and thus get rid of 48 cycles accumulated penalty]
+ # registers [and thus get rid of 48 cycles accumulated penalty]
@Xi=map("%xmm$_",(0..4));
($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9));
@V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14));
@Xi=map("%xmm$_",(0..4));
($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9));
@V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14));
@@
-119,7
+126,7
@@
my $k=$i+2;
# ...
# $i==13: 14,15,15,15,
# $i==14: 15
# ...
# $i==13: 14,15,15,15,
# $i==14: 15
-#
+#
# Then at $i==15 Xupdate is applied one iteration in advance...
$code.=<<___ if ($i==0);
movd (@ptr[0]),@Xi[0]
# Then at $i==15 Xupdate is applied one iteration in advance...
$code.=<<___ if ($i==0);
movd (@ptr[0]),@Xi[0]
@@
-356,6
+363,7
@@
$code.=<<___;
.type sha1_multi_block,\@function,3
.align 32
sha1_multi_block:
.type sha1_multi_block,\@function,3
.align 32
sha1_multi_block:
+.cfi_startproc
mov OPENSSL_ia32cap_P+4(%rip),%rcx
bt \$61,%rcx # check SHA bit
jc _shaext_shortcut
mov OPENSSL_ia32cap_P+4(%rip),%rcx
bt \$61,%rcx # check SHA bit
jc _shaext_shortcut
@@
-366,8
+374,11
@@
$code.=<<___ if ($avx);
___
$code.=<<___;
mov %rsp,%rax
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbx
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
@@
-386,6
+397,7
@@
$code.=<<___;
sub \$`$REG_SZ*18`,%rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
sub \$`$REG_SZ*18`,%rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
.Lbody:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
@@
-479,7
+491,8
@@
$code.=<<___;
jnz .Loop_grande
.Ldone:
jnz .Loop_grande
.Ldone:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
@@
-495,10
+508,14
@@
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -16(%rax),%rbp
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
.Lepilogue:
ret
+.cfi_endproc
.size sha1_multi_block,.-sha1_multi_block
___
{{{
.size sha1_multi_block,.-sha1_multi_block
___
{{{
@@
-510,10
+527,14
@@
$code.=<<___;
.type sha1_multi_block_shaext,\@function,3
.align 32
sha1_multi_block_shaext:
.type sha1_multi_block_shaext,\@function,3
.align 32
sha1_multi_block_shaext:
+.cfi_startproc
_shaext_shortcut:
mov %rsp,%rax
_shaext_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
@@
-539,7
+560,7
@@
$code.=<<___;
movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap
.Loop_grande_shaext:
movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap
.Loop_grande_shaext:
- mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num
+ mov $num,`$REG_SZ*17+8`(%rsp) # orig
i
nal $num
xor $num,$num
___
for($i=0;$i<2;$i++) {
xor $num,$num
___
for($i=0;$i<2;$i++) {
@@
-749,10
+770,14
@@
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -16(%rax),%rbp
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
ret
.Lepilogue_shaext:
ret
+.cfi_endproc
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
___
}}}
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
___
}}}
@@
-995,6
+1020,7
@@
$code.=<<___;
.type sha1_multi_block_avx,\@function,3
.align 32
sha1_multi_block_avx:
.type sha1_multi_block_avx,\@function,3
.align 32
sha1_multi_block_avx:
+.cfi_startproc
_avx_shortcut:
___
$code.=<<___ if ($avx>1);
_avx_shortcut:
___
$code.=<<___ if ($avx>1);
@@
-1009,8
+1035,11
@@
$code.=<<___ if ($avx>1);
___
$code.=<<___;
mov %rsp,%rax
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
@@
-1029,6
+1058,7
@@
$code.=<<___;
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
.Lbody_avx:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
@@
-1117,7
+1147,8
@@
$code.=<<___;
jnz .Loop_grande_avx
.Ldone_avx:
jnz .Loop_grande_avx
.Ldone_avx:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
vzeroupper
___
$code.=<<___ if ($win64);
@@
-1134,10
+1165,14
@@
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -16(%rax),%rbp
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
ret
.Lepilogue_avx:
ret
+.cfi_endproc
.size sha1_multi_block_avx,.-sha1_multi_block_avx
___
.size sha1_multi_block_avx,.-sha1_multi_block_avx
___
@@
-1157,14
+1192,22
@@
$code.=<<___;
.type sha1_multi_block_avx2,\@function,3
.align 32
sha1_multi_block_avx2:
.type sha1_multi_block_avx2,\@function,3
.align 32
sha1_multi_block_avx2:
+.cfi_startproc
_avx2_shortcut:
mov %rsp,%rax
_avx2_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
push %rbx
+.cfi_push %rbx
push %rbp
push %rbp
+.cfi_push %rbp
push %r12
push %r12
+.cfi_push %r12
push %r13
push %r13
+.cfi_push %r13
push %r14
push %r14
+.cfi_push %r14
push %r15
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
@@
-1183,6
+1226,7
@@
$code.=<<___;
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx2:
lea K_XX_XX(%rip),$Tbl
shr \$1,$num
.Lbody_avx2:
lea K_XX_XX(%rip),$Tbl
shr \$1,$num
@@
-1272,7
+1316,8
@@
$code.=<<___;
#jnz .Loop_grande_avx2
.Ldone_avx2:
#jnz .Loop_grande_avx2
.Ldone_avx2:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
vzeroupper
___
$code.=<<___ if ($win64);
@@
-1289,14
+1334,22
@@
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
ret
.Lepilogue_avx2:
ret
+.cfi_endproc
.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
___
} }}}
.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
___
} }}}