Remove trailing whitespace from some files.

[openssl.git] / crypto / sha / asm / sha512-586.pl
diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl

index 77a5c3113b3b0dbbac87f081f792b878dc4154f1..94cc0114f88a40d2843e018780cb6c07b3615f2c 100644 (file)
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
  #
  # ====================================================================
  # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -28,6 +35,7 @@
  # Bulldozer    121     -       50      14.0    13.5
  # VIA Nano     91      -       52      33      14.7
  # Atom         126     -       68      48(***) 14.7
+# Silvermont   97      -       58      42(***) 17.5
  #
  # (*)  whichever best applicable.
  # (**) x86_64 assembler performance is presented for reference
@@ -36,7 +44,7 @@
  #
  # IALU code-path is optimized for elder Pentiums. On vanilla Pentium
  # performance improvement over compiler generated code reaches ~60%,
-# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
+# while on PIII - ~35%. On newer Âµ-archs improvement varies from 15%
  # to 50%, but it's less important as they are expected to execute SSE2
  # code-path, which is commonly ~2-3x faster [than compiler generated
  # code]. SSE2 code-path is as fast as original sha512-sse2.pl, even
@@ -49,6 +57,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  push(@INC,"${dir}","${dir}../../perlasm");
  require "x86asm.pl";
  
+$output=pop;
+open STDOUT,">$output";
+
  &asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
  
  $sse2=0;
@@ -372,7 +383,7 @@ if ($sse2) {
  
  &set_label("16_79_sse2",16);
      for ($j=0;$j<2;$j++) {                     # 2x unroll
-       #&movq  ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15 
+       #&movq  ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15
         &movq   ("mm5",&QWP(8*(9+16-14),"esp"));
         &movq   ("mm1","mm7");
         &psrlq  ("mm7",1);
@@ -533,7 +544,7 @@ sub BODY_00_15_ssse3 {              # "phase-less" copy of BODY_00_15_sse2
          '&psllq($E,4)',
         '&pxor  ("mm3",$E)',                            # T1=Sigma1_512(e)
  
-        '&movq ($E,&DWP(8*($i+3)%64,"esp"))',          # e = load d, e in next round
+        '&movq ($E,&QWP(8*($i+3)%64,"esp"))',          # e = load d, e in next round
         '&paddq ("mm3","mm7")',                         # T1+=X[i]
          '&movq ("mm5",$A)',                            # %mm5 is sliding right
          '&psrlq("mm5",28)',
@@ -908,3 +919,5 @@ sub BODY_00_15_ssse3 {              # "phase-less" copy of BODY_00_15_sse2
  &asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
  
  &asm_finish();
+
+close STDOUT;