crypto/x86_64cpuid.pl: fix AVX512 capability masking.
authorAndy Polyakov <appro@openssl.org>
Wed, 22 Nov 2017 19:48:44 +0000 (20:48 +0100)
committerAndy Polyakov <appro@openssl.org>
Thu, 23 Nov 2017 20:05:44 +0000 (21:05 +0100)
Originally it was thought that it's possible to use AVX512VL+BW
instructions with XMM and YMM registers without kernel enabling
ZMM support, but it turned to be wrong assumption.

Reviewed-by: Rich Salz <rsalz@openssl.org>
crypto/x86_64cpuid.pl

index 7c8952e..d30928e 100644 (file)
@@ -197,10 +197,11 @@ OPENSSL_ia32_cpuid:
        and     \$0xe6,%eax             # isolate XMM, YMM and ZMM state support
        cmp     \$0xe6,%eax
        je      .Ldone
-       andl    \$0xfffeffff,8(%rdi)    # clear AVX512F, ~(1<<16)
-                                       # note that we don't touch other AVX512
-                                       # extensions, because they can be used
-                                       # with YMM (without opmasking though)
+       andl    \$0x3fdeffff,8(%rdi)    # ~(1<<31|1<<30|1<<21|1<<16)
+                                       # clear AVX512F+BW+VL+FIMA, all of
+                                       # them are EVEX-encoded, which requires
+                                       # ZMM state support even if one uses
+                                       # only XMM and YMM :-(
        and     \$6,%eax                # isolate XMM and YMM state support
        cmp     \$6,%eax
        je      .Ldone