Kill unused macro and reimplement it for that single context it can
[openssl.git] / crypto / md32_common.h
index 0cdc06e31ec00954d506e2db2905cbbcb6685aca..53db17e1bceb77b7f8863f518ed01e4ecd58f763 100644 (file)
  */
 #undef ROTATE
 #ifndef PEDANTIC
-# if 0 /* defined(_MSC_VER) */
+# if defined(_MSC_VER) || defined(__ICC)
 #  define ROTATE(a,n)  _lrotl(a,n)
 # elif defined(__MWERKS__)
 #  if defined(__POWERPC__)
 #   define ROTATE(a,n) __rlwinm(a,n,0,31)
-#  elif defined(OPENSSL_SYSNAME_NETWARE)
-#   define ROTATE(a,n)  _lrotl(a,n)
 #  elif defined(__MC68K__)
     /* Motorola specific tweak. <appro@fy.chalmers.se> */
 #   define ROTATE(a,n) ( n<24 ? __rol(a,n) : __ror(a,32-n) )
    * Some GNU C inline assembler templates. Note that these are
    * rotates by *constant* number of bits! But that's exactly
    * what we need here...
-   *
    *                                   <appro@fy.chalmers.se>
    */
 #  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
                        })
 #  endif
 # endif
-
-/*
- * Engage compiler specific "fetch in reverse byte order"
- * intrinsic function if available.
- */
-# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
-  /* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
-#  if (defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)) && !defined(I386_ONLY)
-#   define BE_FETCH32(a)       ({ register unsigned int l=(a);\
-                               asm (                   \
-                               "bswapl %0"             \
-                               : "=r"(l) : "0"(l));    \
-                         l;                            \
-                       })
-#  elif defined(__powerpc)
-#   define LE_FETCH32(a)       ({ register unsigned int l;     \
-                               asm (                   \
-                               "lwbrx %0,0,%1"         \
-                               : "=r"(l)               \
-                               : "r"(a));              \
-                          l;                           \
-                       })
-
-#  elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
-#  define LE_FETCH32(a)        ({ register unsigned int l;             \
-                               asm (                           \
-                               "lda [%1]#ASI_PRIMARY_LITTLE,%0"\
-                               : "=r"(l)                       \
-                               : "r"(a));                      \
-                          l;                                   \
-                       })
-#  endif
-# endif
 #endif /* PEDANTIC */
 
 #if HASH_LONG_LOG2==2  /* Engage only if sizeof(HASH_LONG)== 4 */
 #    if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
 #      define HASH_BLOCK_DATA_ORDER_ALIGNED    HASH_BLOCK_HOST_ORDER
 #    endif
-#  elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-#    ifndef HOST_FETCH32
-#      ifdef LE_FETCH32
-#        define HOST_FETCH32(p,l)      LE_FETCH32(p)
-#      elif defined(REVERSE_FETCH32)
-#        define HOST_FETCH32(p,l)      REVERSE_FETCH32(p,l)
-#      endif
-#    endif
 #  endif
 #elif defined(L_ENDIAN)
 #  if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 #    if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
 #      define HASH_BLOCK_DATA_ORDER_ALIGNED    HASH_BLOCK_HOST_ORDER
 #    endif
-#  elif defined(DATA_ORDER_IS_BIG_ENDIAN)
-#    ifndef HOST_FETCH32
-#      ifdef BE_FETCH32
-#        define HOST_FETCH32(p,l)      BE_FETCH32(p)
-#      elif defined(REVERSE_FETCH32)
-#        define HOST_FETCH32(p,l)      REVERSE_FETCH32(p,l)
-#      endif
-#    endif
 #  endif
 #endif
 
 
 #if defined(DATA_ORDER_IS_BIG_ENDIAN)
 
+#ifndef PEDANTIC
+# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+#  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
+    /*
+     * This gives ~30-40% performance improvement in SHA-256 compiled
+     * with gcc [on P4]. Well, first macro to be frank. We can pull
+     * this trick on x86* platforms only, because these CPUs can fetch
+     * unaligned data without raising an exception.
+     */
+#   define HOST_c2l(c,l)       ({ (l)=*((const unsigned int *)(c));    \
+                                  asm ("bswapl %0":"=r"(l):"0"(l));    \
+                                  (c)+=4; (l);                         })
+#   define HOST_l2c(l,c)       ({ unsigned int r=(l);                  \
+                                  asm ("bswapl %0":"=r"(r):"0"(r));    \
+                                  *((unsigned int *)(c))=r; (c)+=4; r; })
+#  endif
+# endif
+#endif
+
+#ifndef HOST_c2l
 #define HOST_c2l(c,l)  (l =(((unsigned long)(*((c)++)))<<24),          \
                         l|=(((unsigned long)(*((c)++)))<<16),          \
                         l|=(((unsigned long)(*((c)++)))<< 8),          \
                         l|=(((unsigned long)(*((c)++)))    ),          \
                         l)
+#endif
 #define HOST_p_c2l(c,l,n)      {                                       \
                        switch (n) {                                    \
                        case 0: l =((unsigned long)(*((c)++)))<<24;     \
                        case 2: l|=((unsigned long)(*(--(c))))<<16;     \
                        case 1: l|=((unsigned long)(*(--(c))))<<24;     \
                                } }
+#ifndef HOST_l2c
 #define HOST_l2c(l,c)  (*((c)++)=(unsigned char)(((l)>>24)&0xff),      \
                         *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
                         *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
                         *((c)++)=(unsigned char)(((l)    )&0xff),      \
                         l)
+#endif
 
 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 
+#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
+  /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
+# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l)
+# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l)
+#endif
+
+#ifndef HOST_c2l
 #define HOST_c2l(c,l)  (l =(((unsigned long)(*((c)++)))    ),          \
                         l|=(((unsigned long)(*((c)++)))<< 8),          \
                         l|=(((unsigned long)(*((c)++)))<<16),          \
                         l|=(((unsigned long)(*((c)++)))<<24),          \
                         l)
+#endif
 #define HOST_p_c2l(c,l,n)      {                                       \
                        switch (n) {                                    \
                        case 0: l =((unsigned long)(*((c)++)));         \
                        case 2: l|=((unsigned long)(*(--(c))))<< 8;     \
                        case 1: l|=((unsigned long)(*(--(c))));         \
                                } }
+#ifndef HOST_l2c
 #define HOST_l2c(l,c)  (*((c)++)=(unsigned char)(((l)    )&0xff),      \
                         *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
                         *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
                         *((c)++)=(unsigned char)(((l)>>24)&0xff),      \
                         l)
+#endif
 
 #endif
 
  * Time for some action:-)
  */
 
-int HASH_UPDATE (HASH_CTX *c, const void *data_, unsigned long len)
+int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
        {
        const unsigned char *data=data_;
        register HASH_LONG * p;
-       register unsigned long l;
-       int sw,sc,ew,ec;
+       register HASH_LONG l;
+       unsigned int sw,sc,ew,ec;
 
        if (len==0) return 1;
 
-       l=(c->Nl+(len<<3))&0xffffffffL;
+       l=(c->Nl+(((HASH_LONG)len)<<3))&0xffffffffUL;
        /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
         * Wei Dai <weidai@eskimo.com> for pointing it out. */
        if (l < c->Nl) /* overflow */
                c->Nh++;
-       c->Nh+=(len>>29);
+       c->Nh+=(len>>29);       /* might cause compiler warning on 16-bit */
        c->Nl=l;
 
        if (c->num != 0)
@@ -483,7 +464,7 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, unsigned long len)
                 * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
                 * only if sizeof(HASH_LONG)==4.
                 */
-               if ((((unsigned long)data)%4) == 0)
+               if ((((size_t)data)%4) == 0)
                        {
                        /* data is properly aligned so that we can cast it: */
                        HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw);
@@ -532,7 +513,7 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, unsigned long len)
 void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
        {
 #if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
-       if ((((unsigned long)data)%4) == 0)
+       if ((((size_t)data)%4) == 0)
                /* data is properly aligned so that we can cast it: */
                HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1);
        else