aesni-sha1-x86_64.pl: Atom-specific optimization.
authorAndy Polyakov <appro@openssl.org>
Sat, 25 May 2013 17:08:39 +0000 (19:08 +0200)
committerAndy Polyakov <appro@openssl.org>
Sat, 25 May 2013 17:08:39 +0000 (19:08 +0200)
crypto/aes/asm/aesni-sha1-x86_64.pl

index ad17f35..031e19b 100644 (file)
@@ -52,9 +52,9 @@
 #
 #              AES-128-CBC     AES-192-CBC     AES-256-CBC
 # Westmere     1.31            1.55            1.80
-# Sandy Bridge 0.93            1.06            1.22
-# Ivy Bridge   0.92            1.06            1.21
-# Bulldozer    0.76            0.90            1.04
+# Sandy Bridge 0.74            0.91            1.09
+# Ivy Bridge   0.74            0.90            1.11
+# Bulldozer    0.70            0.85            0.99
 
 $flavour = shift;
 $output  = shift;
@@ -125,6 +125,13 @@ my $K_XX_XX="%r11";
 my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13));
 my @rndkey=("%xmm14","%xmm15");
 
+if (1) {
+    @X=map("%xmm$_",(4..11));
+    @Tx=map("%xmm$_",(12..14));
+    ($iv,$in,$rndkey0)=map("%xmm$_",(2,3,15));
+    @rndkey=("%xmm0","%xmm1");
+}
+
 sub AUTOLOAD()         # thunk [simplified] 32-bit style perlasm
 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
   my $arg = pop;
@@ -671,9 +678,11 @@ my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"
 my $Xi=4;
 my @X=map("%xmm$_",(4..7,0..3));
 my @Tx=map("%xmm$_",(8..10));
-my $Kx=$rndkey0;
 my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp");   # size optimization
 my @T=("%esi","%edi");
+my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13));
+my @rndkey=("%xmm14","%xmm15");
+my $Kx=$rndkey0;
 
 my $_rol=sub { &shld(@_[0],@_) };
 my $_ror=sub { &shrd(@_[0],@_) };