sha[1|512]-x86_64.pl: fix logical errors with $shaext=0.
[openssl.git] / crypto / sha / asm / sha1-x86_64.pl
index ea288c15d5e9e4fa4b8d9dac317f1260235fefcd..9bb6b498190fdf81f7de9ca1d6429191915ca59b 100755 (executable)
@@ -107,6 +107,13 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
        $avx = ($1>=10) + ($1>=11);
 }
 
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([2-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+$shaext=1;     ### set to zero if compiling for 1.0.1
+$avx=1         if (!$shaext && $avx);
+
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
@@ -245,7 +252,8 @@ sha1_block_data_order:
        mov     OPENSSL_ia32cap_P+8(%rip),%r10d
        test    \$`1<<9`,%r8d           # check SSSE3 bit
        jz      .Lialu
-
+___
+$code.=<<___ if ($shaext);
        test    \$`1<<29`,%r10d         # check SHA bit 
        jnz     _shaext_shortcut
 ___
@@ -321,7 +329,7 @@ $code.=<<___;
        ret
 .size  sha1_block_data_order,.-sha1_block_data_order
 ___
-{{{
+if ($shaext) {{{
 ######################################################################
 # Intel SHA Extensions implementation of SHA1 update function.
 #
@@ -1827,7 +1835,9 @@ se_handler:
 
        jmp     .Lcommon_seh_tail
 .size  se_handler,.-se_handler
+___
 
+$code.=<<___ if ($shaext);
 .type  shaext_handler,\@abi-omnipotent
 .align 16
 shaext_handler:
@@ -1860,7 +1870,9 @@ shaext_handler:
 
        jmp     .Lcommon_seh_tail
 .size  shaext_handler,.-shaext_handler
+___
 
+$code.=<<___;
 .type  ssse3_handler,\@abi-omnipotent
 .align 16
 ssse3_handler:
@@ -1956,9 +1968,13 @@ ssse3_handler:
        .rva    .LSEH_begin_sha1_block_data_order
        .rva    .LSEH_end_sha1_block_data_order
        .rva    .LSEH_info_sha1_block_data_order
+___
+$code.=<<___ if ($shaext);
        .rva    .LSEH_begin_sha1_block_data_order_shaext
        .rva    .LSEH_end_sha1_block_data_order_shaext
        .rva    .LSEH_info_sha1_block_data_order_shaext
+___
+$code.=<<___;
        .rva    .LSEH_begin_sha1_block_data_order_ssse3
        .rva    .LSEH_end_sha1_block_data_order_ssse3
        .rva    .LSEH_info_sha1_block_data_order_ssse3
@@ -1979,9 +1995,13 @@ $code.=<<___;
 .LSEH_info_sha1_block_data_order:
        .byte   9,0,0,0
        .rva    se_handler
+___
+$code.=<<___ if ($shaext);
 .LSEH_info_sha1_block_data_order_shaext:
        .byte   9,0,0,0
        .rva    shaext_handler
+___
+$code.=<<___;
 .LSEH_info_sha1_block_data_order_ssse3:
        .byte   9,0,0,0
        .rva    ssse3_handler
@@ -2022,8 +2042,12 @@ sub sha1op38 {
                "sha1msg1"  => 0xc9,
                "sha1msg2"  => 0xca     );
 
-    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-7]),\s*%xmm([0-7])/) {
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
       my @opcode=(0x0f,0x38);
+      my $rex=0;
+       $rex|=0x04                      if ($2>=8);
+       $rex|=0x01                      if ($1>=8);
+       unshift @opcode,0x40|$rex       if ($rex);
        push @opcode,$opcodelet{$instr};
        push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
        return ".byte\t".join(',',@opcode);