Expand OPENSSL_ia32cap_P to 64 bits. It might appear controversial, because
authorAndy Polyakov <appro@openssl.org>
Tue, 28 Jun 2011 13:31:58 +0000 (13:31 +0000)
committerAndy Polyakov <appro@openssl.org>
Tue, 28 Jun 2011 13:31:58 +0000 (13:31 +0000)
such operation can be considered as breaking binary compatibility. However!
OPNESSL_ia32cap_P is accessed by application through pointer returned by
OPENSSL_ia32cap_loc() and such change of *internal* OPENSSL_ia32cap_P
declaration is possible specifically on little-endian platforms, such as
x86[_64] ones in question. In addition, if 32-bit application calls
OPENSSL_ia32cap_loc(), it clears upper half of capability vector maintaining
the illusion that it's still 32 bits wide.

crypto/cryptlib.c
crypto/cryptlib.h
crypto/perlasm/x86gas.pl
crypto/perlasm/x86masm.pl
crypto/perlasm/x86nasm.pl
crypto/whrlpool/wp_block.c

index 072b341..8384b5d 100644 (file)
@@ -665,28 +665,49 @@ const char *CRYPTO_get_lock_name(int type)
        defined(__INTEL__) || \
        defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
 
-unsigned long  OPENSSL_ia32cap_P=0;
-unsigned long *OPENSSL_ia32cap_loc(void) { return &OPENSSL_ia32cap_P; }
+unsigned int  OPENSSL_ia32cap_P[2];
+unsigned long *OPENSSL_ia32cap_loc(void)
+{   if (sizeof(long)==4)
+       /*
+        * If 32-bit application pulls address of OPENSSL_ia32cap_P[0]
+        * clear second element to maintain the illusion that vector
+        * is 32-bit.
+        */
+       OPENSSL_ia32cap_P[1]=0;
+    return (unsigned long *)OPENSSL_ia32cap_P;
+}
 
 #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
 #define OPENSSL_CPUID_SETUP
+#if defined(_WIN32)
+typedef unsigned __int64 IA32CAP;
+#else
+typedef unsigned long long IA32CAP;
+#endif
 void OPENSSL_cpuid_setup(void)
 { static int trigger=0;
-  unsigned long OPENSSL_ia32_cpuid(void);
+  IA32CAP OPENSSL_ia32_cpuid(void);
+  IA32CAP vec;
   char *env;
 
     if (trigger)       return;
 
     trigger=1;
     if ((env=getenv("OPENSSL_ia32cap")))
-       OPENSSL_ia32cap_P = strtoul(env,NULL,0)|(1<<10);
+#if defined(_WIN32)
+    {  if (!sscanf(env,"%I64i",&vec)) vec = strtoul(env,NULL,0);   }
+#else
+       vec = strtoull(env,NULL,0);
+#endif
     else
-       OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid()|(1<<10);
+       vec = OPENSSL_ia32_cpuid();
     /*
      * |(1<<10) sets a reserved bit to signal that variable
      * was initialized already... This is to avoid interference
      * with cpuid snippets in ELF .init segment.
      */
+    OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10);
+    OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
 }
 #endif
 
index fc249c5..1761f6b 100644 (file)
@@ -99,7 +99,7 @@ extern "C" {
 #define HEX_SIZE(type)         (sizeof(type)*2)
 
 void OPENSSL_cpuid_setup(void);
-extern unsigned long OPENSSL_ia32cap_P;
+extern unsigned int OPENSSL_ia32cap_P[];
 void OPENSSL_showfatal(const char *,...);
 void *OPENSSL_stderr(void);
 extern int OPENSSL_NONPIC_relocated;
index 6eab727..b470507 100644 (file)
@@ -150,7 +150,7 @@ sub ::public_label
 
 sub ::file_end
 {   if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
-       my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,4";
+       my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8";
        if ($::elf)     { push (@out,"$tmp,4\n"); }
        else            { push (@out,"$tmp\n"); }
     }
index 3d50e4a..03e7ba6 100644 (file)
@@ -129,7 +129,7 @@ ___
     if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
     {  my $comm=<<___;
 .bss   SEGMENT 'BSS'
-COMM   ${nmdecor}OPENSSL_ia32cap_P:DWORD
+COMM   ${nmdecor}OPENSSL_ia32cap_P:QWORD
 .bss   ENDS
 ___
        # comment out OPENSSL_ia32cap_P declarations
index ce2bed9..1a38458 100644 (file)
@@ -114,7 +114,7 @@ sub ::file_end
 {   if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
     {  my $comm=<<___;
 ${drdecor}segment      .bss
-${drdecor}common       ${nmdecor}OPENSSL_ia32cap_P 4
+${drdecor}common       ${nmdecor}OPENSSL_ia32cap_P 8
 ___
        # comment out OPENSSL_ia32cap_P declarations
        grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
index 221f6cc..824ed18 100644 (file)
@@ -68,9 +68,9 @@ typedef unsigned long long    u64;
                                           CPUs this is actually faster! */
 #    endif
 #    define GO_FOR_MMX(ctx,inp,num)    do {                    \
-       extern unsigned long OPENSSL_ia32cap_P;                 \
+       extern unsigned int OPENSSL_ia32cap_P[];                \
        void whirlpool_block_mmx(void *,const void *,size_t);   \
-       if (!(OPENSSL_ia32cap_P & (1<<23)))     break;          \
+       if (!(OPENSSL_ia32cap_P[0] & (1<<23)))  break;          \
         whirlpool_block_mmx(ctx->H.c,inp,num); return;         \
                                        } while (0)
 #  endif