X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fx86_64cpuid.pl;h=db5aa4aa361d4f6cf46a378bb45bff80ecdd464c;hp=40d42135bbc8747891b0794b9ccfe07a47c66652;hb=c3586512184b4ca6c682c479323ac282ab30298e;hpb=ddc20d4da9d770afeace2b4111d5d39d00e6c6b4 diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 40d42135bb..db5aa4aa36 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -7,7 +7,12 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order ("%rdi","%rsi","%rdx","%rcx"); # Unix order @@ -19,7 +24,7 @@ print<<___; call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,8 +.comm OPENSSL_ia32cap_P,16,4 .text @@ -48,12 +53,13 @@ OPENSSL_rdtsc: .size OPENSSL_rdtsc,.-OPENSSL_rdtsc .globl OPENSSL_ia32_cpuid -.type OPENSSL_ia32_cpuid,\@abi-omnipotent +.type OPENSSL_ia32_cpuid,\@function,1 .align 16 OPENSSL_ia32_cpuid: mov %rbx,%r8 # save %rbx xor %eax,%eax + mov %eax,8(%rdi) # clear 3rd word cpuid mov %eax,%r11d # max value for standard query level @@ -121,16 +127,25 @@ OPENSSL_ia32_cpuid: shr \$14,%r10d and \$0xfff,%r10d # number of cores -1 per L1D + cmp \$7,%r11d + jb .Lnocacheinfo + + mov \$7,%eax + xor %ecx,%ecx + cpuid + mov %ebx,8(%rdi) + .Lnocacheinfo: mov \$1,%eax cpuid + and \$0xbfefffff,%edx # force reserved bits to 0 cmp \$0,%r9d jne .Lnotintel - or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR + or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs and \$15,%ah cmp \$15,%ah # examine Family ID - je .Lnotintel - or \$0x40000000,%edx # use reserved bit to skip unrolled loop + jne .Lnotintel + or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Lgeneric @@ -146,12 +161,10 @@ OPENSSL_ia32_cpuid: .Lgeneric: and \$0x00000800,%r9d # isolate AMD XOP flag and \$0xfffff7ff,%ecx - or %r9d,%ecx # merge AMD XOP flag + or %ecx,%r9d # merge AMD XOP flag - shl \$32,%rcx - mov %edx,%ebx - or %rcx,%rbx # compose capability vector in %rbx - bt \$27+32,%rcx # check OSXSAVE bit + mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx + bt \$27,%r9d # check OSXSAVE bit jnc .Lclear_avx xor %ecx,%ecx # XCR0 .byte 0x0f,0x01,0xd0 # xgetbv @@ -160,11 +173,13 @@ OPENSSL_ia32_cpuid: je .Ldone .Lclear_avx: mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) - shl \$32,%rax - and %rax,%rbx # clear AVX, FMA and AMD XOP bits + and %eax,%r9d # clear AVX, FMA and AMD XOP bits + andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5) .Ldone: - mov %rbx,%rax + shl \$32,%r9 + mov %r10d,%eax mov %r8,%rbx # restore %rbx + or %r9,%rax ret .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid @@ -349,4 +364,36 @@ OPENSSL_instrument_bus2: ___ } +print<<___; +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,\@abi-omnipotent +.align 16 +OPENSSL_ia32_rdrand: + mov \$8,%ecx +.Loop_rdrand: + rdrand %rax + jc .Lbreak_rdrand + loop .Loop_rdrand +.Lbreak_rdrand: + cmp \$0,%rax + cmove %rcx,%rax + ret +.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand + +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,\@abi-omnipotent +.align 16 +OPENSSL_ia32_rdseed: + mov \$8,%ecx +.Loop_rdseed: + rdseed %rax + jc .Lbreak_rdseed + loop .Loop_rdseed +.Lbreak_rdseed: + cmp \$0,%rax + cmove %rcx,%rax + ret +.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed +___ + close STDOUT; # flush