Some assembler-related clean-ups.

[openssl.git] / crypto / md5 / md5_locl.h
diff --git a/crypto/md5/md5_locl.h b/crypto/md5/md5_locl.h

index dbbe1b71ca58855835de5e8896a69768af459863..222639788f28d31152a52114523d3e7c67c2eb1d 100644 (file)
--- a/crypto/md5/md5_locl.h
+++ b/crypto/md5/md5_locl.h
@@ -56,102 +56,84 @@
   * [including the GNU Public Licence.]
   */
  
-/* On sparc, this actually slows things down :-( */
-#if defined(sun)
-#undef B_ENDIAN
-#endif
-
  #include <stdlib.h>
  #include <string.h>
-#include "md5.h"
-
-#define ULONG  unsigned long
-#define UCHAR  unsigned char
-#define UINT   unsigned int
+#include <openssl/opensslconf.h>
+#include <openssl/md5.h>
  
-#if defined(NOCONST)
-#define const
+#ifndef MD5_LONG_LOG2
+#define MD5_LONG_LOG2 2 /* default to 32 bits */
  #endif
  
-#undef c2l
-#define c2l(c,l)       (l = ((unsigned long)(*((c)++)))     , \
-                        l|=(((unsigned long)(*((c)++)))<< 8), \
-                        l|=(((unsigned long)(*((c)++)))<<16), \
-                        l|=(((unsigned long)(*((c)++)))<<24))
-
-#undef p_c2l
-#define p_c2l(c,l,n)   { \
-                       switch (n) { \
-                       case 0: l =((unsigned long)(*((c)++))); \
-                       case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-                       case 2: l|=((unsigned long)(*((c)++)))<<16; \
-                       case 3: l|=((unsigned long)(*((c)++)))<<24; \
-                               } \
-                       }
-
-/* NOTE the pointer is not incremented at the end of this */
-#undef c2l_p
-#define c2l_p(c,l,n)   { \
-                       l=0; \
-                       (c)+=n; \
-                       switch (n) { \
-                       case 3: l =((unsigned long)(*(--(c))))<<16; \
-                       case 2: l|=((unsigned long)(*(--(c))))<< 8; \
-                       case 1: l|=((unsigned long)(*(--(c))))    ; \
-                               } \
-                       }
+#ifdef MD5_ASM
+# if defined(__i386) || defined(WIN32)
+#  define md5_block_host_order md5_block_asm_host_order
+# elif defined(__sparc) && defined(ULTRASPARC)
+   void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num);
+#  define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
+# endif
+#endif
  
-#undef p_c2l_p
-#define p_c2l_p(c,l,sc,len) { \
-                       switch (sc) \
-                               { \
-                       case 0: l =((unsigned long)(*((c)++))); \
-                               if (--len == 0) break; \
-                       case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-                               if (--len == 0) break; \
-                       case 2: l|=((unsigned long)(*((c)++)))<<16; \
-                               } \
-                       }
+void md5_block_host_order (MD5_CTX *c, const void *p,int num);
+void md5_block_data_order (MD5_CTX *c, const void *p,int num);
  
-#undef l2c
-#define l2c(l,c)       (*((c)++)=(unsigned char)(((l)    )&0xff), \
-                        *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>16)&0xff), \
-                        *((c)++)=(unsigned char)(((l)>>24)&0xff))
+#if defined(__i386)
+/*
+ * *_block_host_order is expected to handle aligned data while
+ * *_block_data_order - unaligned. As algorithm and host (x86)
+ * are in this case of the same "endianess" these two are
+ * otherwise indistinguishable. But normally you don't want to
+ * call the same function because unaligned access in places
+ * where alignment is expected is usually a "Bad Thing". Indeed,
+ * on RISCs you get punished with BUS ERROR signal or *severe*
+ * performance degradation. Intel CPUs are in turn perfectly
+ * capable of loading unaligned data without such drastic side
+ * effect. Yes, they say it's slower than aligned load, but no
+ * exception is generated and therefore performance degradation
+ * is *incomparable* with RISCs. What we should weight here is
+ * costs of unaligned access against costs of aligning data.
+ * According to my measurements allowing unaligned access results
+ * in ~9% performance improvement on Pentium II operating at
+ * 266MHz. I won't be surprised if the difference will be higher
+ * on faster systems:-)
+ *
+ *                             <appro@fy.chalmers.se>
+ */
+#define md5_block_data_order md5_block_host_order
+#endif
  
-/* NOTE - c is not incremented as per l2c */
-#undef l2cn
-#define l2cn(l1,l2,c,n)        { \
-                       c+=n; \
-                       switch (n) { \
-                       case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
-                       case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
-                       case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
-                       case 5: *(--(c))=(unsigned char)(((l2)    )&0xff); \
-                       case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
-                       case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
-                       case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
-                       case 1: *(--(c))=(unsigned char)(((l1)    )&0xff); \
-                               } \
-                       }
+#define DATA_ORDER_IS_LITTLE_ENDIAN
+
+#define HASH_LONG              MD5_LONG
+#define HASH_LONG_LOG2         MD5_LONG_LOG2
+#define HASH_CTX               MD5_CTX
+#define HASH_CBLOCK            MD5_CBLOCK
+#define HASH_LBLOCK            MD5_LBLOCK
+#define HASH_UPDATE            MD5_Update
+#define HASH_TRANSFORM         MD5_Transform
+#define HASH_FINAL             MD5_Final
+#define HASH_BLOCK_HOST_ORDER  md5_block_host_order
+#if !defined(L_ENDIAN) || defined(md5_block_data_order)
+#define        HASH_BLOCK_DATA_ORDER   md5_block_data_order
+/*
+ * Little-endians (Intel and Alpha) feel better without this.
+ * It looks like memcpy does better job than generic
+ * md5_block_data_order on copying-n-aligning input data.
+ * But franlky speaking I didn't expect such result on Alpha.
+ * On the other hand I've got this with egcs-1.0.2 and if
+ * program is compiled with another (better?) compiler it
+ * might turn out other way around.
+ *
+ *                             <appro@fy.chalmers.se>
+ */
+#endif
  
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
-/* 5 instructions with rotate instruction, else 9 */
-#define Endian_Reverse32(a) \
-       { \
-       unsigned long l=(a); \
-       (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
-       }
+#ifndef FLAT_INC
+#include "../md32_common.h"
  #else
-/* 6 instructions with rotate instruction, else 8 */
-#define Endian_Reverse32(a) \
-       { \
-       unsigned long l=(a); \
-       l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
-       (a)=ROTATE(l,16L); \
-       }
+#include "md32_common.h"
  #endif
+
  /*
  #define        F(x,y,z)        (((x) & (y))  |  ((~(x)) & (z)))
  #define        G(x,y,z)        (((x) & (z))  |  ((y) & (~(z))))
@@ -166,14 +148,6 @@
  #define        H(b,c,d)        ((b) ^ (c) ^ (d))
  #define        I(b,c,d)        (((~(d)) | (b)) ^ (c))
  
-#undef ROTATE
-#if defined(WIN32)
-#define ROTATE(a,n)     _lrotl(a,n)
-#else
-#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
-#endif
-
-
  #define R0(a,b,c,d,k,s,t) { \
         a+=((k)+(t)+F((b),(c),(d))); \
         a=ROTATE(a,s); \