spacrv9cap.c: addenum to recent EC optimizations.
[openssl.git] / crypto / sparcv9cap.c
index 3ec2340241af11a1d668d70f5ea7ea304438587b..b961cbe3fa114af4050226236f3ec2e10378b930 100644 (file)
@@ -1,6 +1,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <setjmp.h>
+#include <signal.h>
 #include <sys/time.h>
 #include <openssl/bn.h>
 
 #include <sys/time.h>
 #include <openssl/bn.h>
 
@@ -9,6 +11,8 @@
 #define SPARCV9_VIS1           (1<<2)
 #define SPARCV9_VIS2           (1<<3)  /* reserved */
 #define SPARCV9_FMADD          (1<<4)  /* reserved for SPARC64 V */
 #define SPARCV9_VIS1           (1<<2)
 #define SPARCV9_VIS2           (1<<3)  /* reserved */
 #define SPARCV9_FMADD          (1<<4)  /* reserved for SPARC64 V */
+#define SPARCV9_BLK            (1<<5)  /* VIS1 block copy */
+
 static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
 
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
 static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
 
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
@@ -16,17 +20,24 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
        int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
        int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 
        int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
        int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 
-       if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
+       if (num>=8 && !(num&1) &&
+           (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
                (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
                return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
        else
                return bn_mul_mont_int(rp,ap,bp,np,n0,num);
        }
 
                (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
                return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
        else
                return bn_mul_mont_int(rp,ap,bp,np,n0,num);
        }
 
+unsigned long  _sparcv9_rdtick(void);
+void           _sparcv9_vis1_probe(void);
+unsigned long  _sparcv9_vis1_instrument(void);
+void           _sparcv9_vis2_probe(void);
+void           _sparcv9_fmadd_probe(void);
+size_t                 _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
+size_t         _sparcv8_vis1_instrument_bus2(unsigned int *,size_t,size_t);
+
 unsigned long OPENSSL_rdtsc(void)
        {
 unsigned long OPENSSL_rdtsc(void)
        {
-       unsigned long _sparcv9_rdtick(void);
-
        if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED)
 #if defined(__sun) && defined(__SVR4)
                return gethrtime();
        if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED)
 #if defined(__sun) && defined(__SVR4)
                return gethrtime();
@@ -37,10 +48,32 @@ unsigned long OPENSSL_rdtsc(void)
                return _sparcv9_rdtick();
        }
 
                return _sparcv9_rdtick();
        }
 
-#if defined(__sun) && defined(__SVR4)
+size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
+       {
+       if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
+                       SPARCV9_BLK)
+               return _sparcv9_vis1_instrument_bus(out,cnt);
+       else
+               return 0;
+       }
+
+size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
+       {
+       if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
+                       SPARCV9_BLK)
+               return _sparcv9_vis1_instrument_bus2(out,cnt,max);
+       else
+               return 0;
+       }
 
 
+#if 0 && defined(__sun) && defined(__SVR4)
+/* This code path is disabled, because of incompatibility of
+ * libdevinfo.so.1 and libmalloc.so.1 (see below for details)
+ */
+#include <malloc.h>
 #include <dlfcn.h>
 #include <libdevinfo.h>
 #include <dlfcn.h>
 #include <libdevinfo.h>
+#include <sys/systeminfo.h>
 
 typedef di_node_t (*di_init_t)(const char *,uint_t);
 typedef void      (*di_fini_t)(di_node_t);
 
 typedef di_node_t (*di_init_t)(const char *,uint_t);
 typedef void      (*di_fini_t)(di_node_t);
@@ -79,7 +112,7 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name)
 void OPENSSL_cpuid_setup(void)
        {
        void *h;
 void OPENSSL_cpuid_setup(void)
        {
        void *h;
-       char *e;
+       char *e,si[256];
        static int trigger=0;
 
        if (trigger) return;
        static int trigger=0;
 
        if (trigger) return;
@@ -91,6 +124,39 @@ void OPENSSL_cpuid_setup(void)
                return;
                }
 
                return;
                }
 
+       if (sysinfo(SI_MACHINE,si,sizeof(si))>0)
+               {
+               if (strcmp(si,"sun4v"))
+                       /* FPU is preferred for all CPUs, but US-T1/2 */
+                       OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
+               }
+
+       if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
+               {
+               if (strstr(si,"+vis"))
+                       OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
+               if (strstr(si,"+vis2"))
+                       {
+                       OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
+                       OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
+                       return;
+                       }
+               }
+#ifdef M_KEEP
+       /*
+        * Solaris libdevinfo.so.1 is effectively incomatible with
+        * libmalloc.so.1. Specifically, if application is linked with
+        * -lmalloc, it crashes upon startup with SIGSEGV in
+        * free(3LIBMALLOC) called by di_fini. Prior call to
+        * mallopt(M_KEEP,0) somehow helps... But not always...
+        */
+       if ((h = dlopen(NULL,RTLD_LAZY)))
+               {
+               union { void *p; int (*f)(int,int); } sym;
+               if ((sym.p = dlsym(h,"mallopt"))) (*sym.f)(M_KEEP,0);
+               dlclose(h);
+               }
+#endif
        if ((h = dlopen("libdevinfo.so.1",RTLD_LAZY))) do
                {
                di_init_t       di_init;
        if ((h = dlopen("libdevinfo.so.1",RTLD_LAZY))) do
                {
                di_init_t       di_init;
@@ -117,18 +183,76 @@ void OPENSSL_cpuid_setup(void)
 
 #else
 
 
 #else
 
+static sigjmp_buf common_jmp;
+static void common_handler(int sig) { siglongjmp(common_jmp,sig); }
+
 void OPENSSL_cpuid_setup(void)
        {
        char *e;
 void OPENSSL_cpuid_setup(void)
        {
        char *e;
+       struct sigaction        common_act,ill_oact,bus_oact;
+       sigset_t                all_masked,oset;
+       static int trigger=0;
+
+       if (trigger) return;
+       trigger=1;
  
        if ((e=getenv("OPENSSL_sparcv9cap")))
                {
  
        if ((e=getenv("OPENSSL_sparcv9cap")))
                {
-               OPENSSL_sparcv9cap_P=strtoul(env,NULL,0);
+               OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
                return;
                }
 
                return;
                }
 
-       /* For now we assume that the rest supports UltraSPARC-I* only */
-       OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
+       /* Initial value, fits UltraSPARC-I&II... */
+       OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED;
+
+       sigfillset(&all_masked);
+       sigdelset(&all_masked,SIGILL);
+       sigdelset(&all_masked,SIGTRAP);
+#ifdef SIGEMT
+       sigdelset(&all_masked,SIGEMT);
+#endif
+       sigdelset(&all_masked,SIGFPE);
+       sigdelset(&all_masked,SIGBUS);
+       sigdelset(&all_masked,SIGSEGV);
+       sigprocmask(SIG_SETMASK,&all_masked,&oset);
+
+       memset(&common_act,0,sizeof(common_act));
+       common_act.sa_handler = common_handler;
+       common_act.sa_mask    = all_masked;
+
+       sigaction(SIGILL,&common_act,&ill_oact);
+       sigaction(SIGBUS,&common_act,&bus_oact);/* T1 fails 16-bit ldda [on Linux] */
+
+       if (sigsetjmp(common_jmp,1) == 0)
+               {
+               _sparcv9_rdtick();
+               OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
+               }
+
+       if (sigsetjmp(common_jmp,1) == 0)
+               {
+               _sparcv9_vis1_probe();
+               OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
+               /* detect UltraSPARC-Tx, see sparccpud.S for details... */
+               if (_sparcv9_vis1_instrument() >= 12)
+                       OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
+               else
+                       {
+                       _sparcv9_vis2_probe();
+                       OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
+                       }
+               }
+
+       if (sigsetjmp(common_jmp,1) == 0)
+               {
+               _sparcv9_fmadd_probe();
+               OPENSSL_sparcv9cap_P |= SPARCV9_FMADD;
+               }
+
+       sigaction(SIGBUS,&bus_oact,NULL);
+       sigaction(SIGILL,&ill_oact,NULL);
+
+       sigprocmask(SIG_SETMASK,&oset,NULL);
        }
 
 #endif
        }
 
 #endif