crypto/x86_64cpuid.pl: move extended feature detection upwards.
authorAndy Polyakov <appro@openssl.org>
Sun, 5 Mar 2017 19:38:36 +0000 (20:38 +0100)
committerAndy Polyakov <appro@openssl.org>
Tue, 7 Mar 2017 10:19:20 +0000 (11:19 +0100)
Exteneded feature flags were not pulled on AMD processors, as result a
number of extensions were effectively masked on Ryzen. It should have
been reported for Excavator since it implements AVX2 extension, but
apparently nobody noticed or cared...

Reviewed-by: Rich Salz <rsalz@openssl.org>
(cherry picked from commit f8418d87e191e46b81e1b9548326ab2876fa0907)

crypto/x86_64cpuid.pl

index 6cb152148b5b6eae2d61da23e18dd442e3edfff5..21d48ee6f9b2b025df649011ddfede439beb0985 100644 (file)
@@ -70,6 +70,16 @@ OPENSSL_ia32_cpuid:
        cpuid
        mov     %eax,%r11d              # max value for standard query level
 
+       cmp     \$7,%eax
+       jb      .Lno_extended_info
+
+       mov     \$7,%eax
+       xor     %ecx,%ecx
+       cpuid
+       mov     %ebx,8(%rdi)
+
+.Lno_extended_info:
+
        xor     %eax,%eax
        cmp     \$0x756e6547,%ebx       # "Genu"
        setne   %al
@@ -134,14 +144,6 @@ OPENSSL_ia32_cpuid:
        shr     \$14,%r10d
        and     \$0xfff,%r10d           # number of cores -1 per L1D
 
-       cmp     \$7,%r11d
-       jb      .Lnocacheinfo
-
-       mov     \$7,%eax
-       xor     %ecx,%ecx
-       cpuid
-       mov     %ebx,8(%rdi)
-
 .Lnocacheinfo:
        mov     \$1,%eax
        cpuid