X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-mb-x86_64.pl;h=56e15292a78074ba5fd215aafc231a7988c83642;hp=099c803ebc41e76ef6c90aeb209d31fd1687e6c7;hb=b5cdec2feac6049418543216ac5da70395697839;hpb=b7f5503fa6e1feebec2ac12b8ddcb5b5672452a6 diff --git a/crypto/sha/asm/sha1-mb-x86_64.pl b/crypto/sha/asm/sha1-mb-x86_64.pl index 099c803ebc..56e15292a7 100644 --- a/crypto/sha/asm/sha1-mb-x86_64.pl +++ b/crypto/sha/asm/sha1-mb-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -59,11 +66,11 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; # void sha1_multi_block ( @@ -88,7 +95,7 @@ $K="%xmm15"; if (1) { # Atom-specific optimization aiming to eliminate pshufb with high - # registers [and thus get rid of 48 cycles accumulated penalty] + # registers [and thus get rid of 48 cycles accumulated penalty] @Xi=map("%xmm$_",(0..4)); ($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9)); @V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14)); @@ -119,7 +126,7 @@ my $k=$i+2; # ... # $i==13: 14,15,15,15, # $i==14: 15 -# +# # Then at $i==15 Xupdate is applied one iteration in advance... $code.=<<___ if ($i==0); movd (@ptr[0]),@Xi[0] @@ -356,6 +363,7 @@ $code.=<<___; .type sha1_multi_block,\@function,3 .align 32 sha1_multi_block: +.cfi_startproc mov OPENSSL_ia32cap_P+4(%rip),%rcx bt \$61,%rcx # check SHA bit jc _shaext_shortcut @@ -366,8 +374,11 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbx ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -386,6 +397,7 @@ $code.=<<___; sub \$`$REG_SZ*18`,%rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody: lea K_XX_XX(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -479,7 +491,8 @@ $code.=<<___; jnz .Loop_grande .Ldone: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 @@ -495,10 +508,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: ret +.cfi_endproc .size sha1_multi_block,.-sha1_multi_block ___ {{{ @@ -510,10 +527,14 @@ $code.=<<___; .type sha1_multi_block_shaext,\@function,3 .align 32 sha1_multi_block_shaext: +.cfi_startproc _shaext_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -539,7 +560,7 @@ $code.=<<___; movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap .Loop_grande_shaext: - mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num + mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<2;$i++) { @@ -749,10 +770,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_shaext: ret +.cfi_endproc .size sha1_multi_block_shaext,.-sha1_multi_block_shaext ___ }}} @@ -995,6 +1020,7 @@ $code.=<<___; .type sha1_multi_block_avx,\@function,3 .align 32 sha1_multi_block_avx: +.cfi_startproc _avx_shortcut: ___ $code.=<<___ if ($avx>1); @@ -1009,8 +1035,11 @@ $code.=<<___ if ($avx>1); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -1029,6 +1058,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx: lea K_XX_XX(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -1117,7 +1147,8 @@ $code.=<<___; jnz .Loop_grande_avx .Ldone_avx: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1134,10 +1165,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: ret +.cfi_endproc .size sha1_multi_block_avx,.-sha1_multi_block_avx ___ @@ -1157,14 +1192,22 @@ $code.=<<___; .type sha1_multi_block_avx2,\@function,3 .align 32 sha1_multi_block_avx2: +.cfi_startproc _avx2_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -1183,6 +1226,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx2: lea K_XX_XX(%rip),$Tbl shr \$1,$num @@ -1272,7 +1316,8 @@ $code.=<<___; #jnz .Loop_grande_avx2 .Ldone_avx2: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1289,14 +1334,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: ret +.cfi_endproc .size sha1_multi_block_avx2,.-sha1_multi_block_avx2 ___ } }}}