aesni-sha256-x86_64.pl: fix crash on AMD Jaguar.

author Andy Polyakov <appro@openssl.org>

Wed, 4 Nov 2015 22:57:06 +0000 (23:57 +0100)

committer Andy Polyakov <appro@openssl.org>

Mon, 16 Nov 2015 12:06:10 +0000 (13:06 +0100)
author Andy Polyakov <appro@openssl.org>
Wed, 4 Nov 2015 22:57:06 +0000 (23:57 +0100)
committer Andy Polyakov <appro@openssl.org>
Mon, 16 Nov 2015 12:06:10 +0000 (13:06 +0100)
diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl

index 74dad44408abde019d6299206116767124abf9f7..8a8199445b01a0aa228cbd17f476121b77061f28 100644 (file)
--- a/crypto/aes/asm/aesni-sha256-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha256-x86_64.pl
@@ -140,11 +140,8 @@ $code.=<<___ if ($avx>1);
         je      ${func}_avx2
  ___
  $code.=<<___;
-       and     \$`1<<30`,%eax                  # mask "Intel CPU" bit
-       and     \$`1<<28|1<<9`,%r10d            # mask AVX+SSSE3 bits
-       or      %eax,%r10d
-       cmp     \$`1<<28|1<<9|1<<30`,%r10d
-       je      ${func}_avx
+       and     \$`1<<28`,%r10d                 # check for AVX
+       jnz     ${func}_avx
         ud2
  ___
                                                 }
diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c

index 411c770868dfaff320be09c6bdc4b913331dfe80..63f6e48d434409fb63f20075340510e60bec51dd 100644 (file)
--- a/crypto/evp/e_aes_cbc_hmac_sha256.c
+++ b/crypto/evp/e_aes_cbc_hmac_sha256.c
@@ -498,7 +498,18 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx,
              iv = AES_BLOCK_SIZE;
  
  #  if defined(STITCHED_CALL)
+        /*
+         * Assembly stitch handles AVX-capable processors, but its
+         * performance is not optimal on AMD Jaguar, ~40% worse, for
+         * unknown reasons. Incidentally processor in question supports
+         * AVX, but not AMD-specific XOP extension, which can be used
+         * to identify it and avoid stitch invocation. So that after we
+         * establish that current CPU supports AVX, we even see if it's
+         * either even XOP-capable Bulldozer-based or GenuineIntel one.
+         */
          if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
+            ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */
+             | (OPENSSL_ia32cap_P[0] & (1<<30))) &&    /* "Intel CPU"? */
              plen > (sha_off + iv) &&
              (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
              SHA256_Update(&key->md, in + iv, sha_off);
author	Andy Polyakov <appro@openssl.org>
	Wed, 4 Nov 2015 22:57:06 +0000 (23:57 +0100)
committer	Andy Polyakov <appro@openssl.org>
	Mon, 16 Nov 2015 12:06:10 +0000 (13:06 +0100)
crypto/aes/asm/aesni-sha256-x86_64.pl		patch \| blob \| history
crypto/evp/e_aes_cbc_hmac_sha256.c		patch \| blob \| history