RC4_set_key for x86_64 and Core2 optimization.
[openssl.git] / crypto / x86_64cpuid.pl
index 4d88ad191b37a3c8d878540fcc9987b1e6d59903..f9f2827636a58e25eb470e0366105f92baf862a3 100644 (file)
@@ -48,8 +48,37 @@ OPENSSL_wipe_cpu     ENDP
 
 OPENSSL_ia32_cpuid     PROC
        mov     r8,rbx
+
+       xor     eax,eax
+       cpuid
+       xor     eax,eax
+       cmp     ebx,0756e6547h
+       setne   al
+       mov     r9d,eax
+       cmp     edx,049656e69h
+       setne   al
+       or      r9d,eax
+       cmp     ecx,06c65746eh
+       setne   al
+       or      r9d,eax
+
        mov     eax,1
        cpuid
+       bt      edx,28
+       jnc     \$Ldone
+       cmp     r9,0
+       jne     \$Lnotintel
+       or      edx,000100000h
+       and     ah,15
+       cmp     ah,15
+       je      \$Lnotintel
+       or      edx,040000000h
+\$Lnotintel:
+       shr     ebx,16
+       cmp     bl,1
+       ja      \$Ldone
+       and     edx,0efffffffh
+\$Ldone:
        shl     rcx,32
        mov     eax,edx
        mov     rbx,r8
@@ -124,8 +153,37 @@ OPENSSL_wipe_cpu:
 .align 16
 OPENSSL_ia32_cpuid:
        movq    %rbx,%r8
+
+       xor     %eax,%eax
+       cpuid
+       xor     %eax,%eax
+       cmp     \$0x756e6547,%ebx       # "Genu"
+       setne   %al
+       mov     %eax,%r9d
+       cmp     \$0x49656e69,%edx       # "ineI"
+       setne   %al
+       or      %eax,%r9d
+       cmp     \$0x6c65746e,%ecx       # "ntel"
+       setne   %al
+       or      %eax,%r9d
+
        movl    \$1,%eax
        cpuid
+       bt      \$28,%edx               # test hyper-threading bit
+       jnc     .Ldone
+       cmp     \$0,%r9
+       jne     .Lnotintel
+       or      \$1<<20,%edx            # use reserved bit to engage RC4_CHAR
+       and     \$15,%ah
+       cmp     \$15,%ah                # examine Family ID
+       je      .Lnotintel
+       or      \$1<<30,%edx            # use reserved bit to skip unrolled loop
+.Lnotintel:
+       shr     \$16,%ebx
+       cmp     \$1,%bl                 # see if cache is shared
+       ja      .Ldone
+       and     \$~(1<<28),%edx
+.Ldone:
        shlq    \$32,%rcx
        movl    %edx,%eax
        movq    %r8,%rbx