X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fmd5%2Fmd5_locl.h;h=2f5b045f36cb94e07d56b52dc17805c87b51c161;hp=2962e773e3b01ef06eaff8da06fa7645bc65a920;hb=73a948508131ad3711b7f6e89d270776e2569e01;hpb=ec577822f95a8bca0023c5c77cef1a4916822d4a

diff --git a/crypto/md5/md5_locl.h b/crypto/md5/md5_locl.h
index 2962e773e3..2f5b045f36 100644
--- a/crypto/md5/md5_locl.h
+++ b/crypto/md5/md5_locl.h
@@ -56,109 +56,94 @@
  * [including the GNU Public Licence.]
  */
 
-/* On sparc, this actually slows things down :-( */
-#if defined(sun)
-#undef B_ENDIAN
-#endif
-
 #include <stdlib.h>
 #include <string.h>
+#include <openssl/e_os2.h>
 #include <openssl/md5.h>
 
-#define ULONG	unsigned long
-#define UCHAR	unsigned char
-#define UINT	unsigned int
-
-#if defined(NOCONST)
-#define const
+#ifndef MD5_LONG_LOG2
+#define MD5_LONG_LOG2 2 /* default to 32 bits */
 #endif
 
-#undef c2l
-#define c2l(c,l)	(l = ((unsigned long)(*((c)++)))     , \
-			 l|=(((unsigned long)(*((c)++)))<< 8), \
-			 l|=(((unsigned long)(*((c)++)))<<16), \
-			 l|=(((unsigned long)(*((c)++)))<<24))
-
-#undef p_c2l
-#define p_c2l(c,l,n)	{ \
-			switch (n) { \
-			case 0: l =((unsigned long)(*((c)++))); \
-			case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-			case 2: l|=((unsigned long)(*((c)++)))<<16; \
-			case 3: l|=((unsigned long)(*((c)++)))<<24; \
-				} \
-			}
+#ifdef MD5_ASM
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__)
+#  define md5_block_host_order md5_block_asm_host_order
+# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
+   void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,size_t num);
+#  define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
+# endif
+#endif
 
-/* NOTE the pointer is not incremented at the end of this */
-#undef c2l_p
-#define c2l_p(c,l,n)	{ \
-			l=0; \
-			(c)+=n; \
-			switch (n) { \
-			case 3: l =((unsigned long)(*(--(c))))<<16; \
-			case 2: l|=((unsigned long)(*(--(c))))<< 8; \
-			case 1: l|=((unsigned long)(*(--(c))))    ; \
-				} \
-			}
+void md5_block_host_order (MD5_CTX *c, const void *p,size_t num);
+void md5_block_data_order (MD5_CTX *c, const void *p,size_t num);
 
-#undef p_c2l_p
-#define p_c2l_p(c,l,sc,len) { \
-			switch (sc) \
-				{ \
-			case 0: l =((unsigned long)(*((c)++))); \
-				if (--len == 0) break; \
-			case 1: l|=((unsigned long)(*((c)++)))<< 8; \
-				if (--len == 0) break; \
-			case 2: l|=((unsigned long)(*((c)++)))<<16; \
-				} \
-			}
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__)
+/*
+ * *_block_host_order is expected to handle aligned data while
+ * *_block_data_order - unaligned. As algorithm and host (x86)
+ * are in this case of the same "endianness" these two are
+ * otherwise indistinguishable. But normally you don't want to
+ * call the same function because unaligned access in places
+ * where alignment is expected is usually a "Bad Thing". Indeed,
+ * on RISCs you get punished with BUS ERROR signal or *severe*
+ * performance degradation. Intel CPUs are in turn perfectly
+ * capable of loading unaligned data without such drastic side
+ * effect. Yes, they say it's slower than aligned load, but no
+ * exception is generated and therefore performance degradation
+ * is *incomparable* with RISCs. What we should weight here is
+ * costs of unaligned access against costs of aligning data.
+ * According to my measurements allowing unaligned access results
+ * in ~9% performance improvement on Pentium II operating at
+ * 266MHz. I won't be surprised if the difference will be higher
+ * on faster systems:-)
+ *
+ *				<appro@fy.chalmers.se>
+ */
+#define md5_block_data_order md5_block_host_order
+#endif
 
-#undef l2c
-#define l2c(l,c)	(*((c)++)=(unsigned char)(((l)    )&0xff), \
-			 *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
-			 *((c)++)=(unsigned char)(((l)>>16)&0xff), \
-			 *((c)++)=(unsigned char)(((l)>>24)&0xff))
+#define DATA_ORDER_IS_LITTLE_ENDIAN
+
+#define HASH_LONG		MD5_LONG
+#define HASH_LONG_LOG2		MD5_LONG_LOG2
+#define HASH_CTX		MD5_CTX
+#define HASH_CBLOCK		MD5_CBLOCK
+#define HASH_LBLOCK		MD5_LBLOCK
+#define HASH_UPDATE		MD5_Update
+#define HASH_TRANSFORM		MD5_Transform
+#define HASH_FINAL		MD5_Final
+#define	HASH_MAKE_STRING(c,s)	do {	\
+	unsigned long ll;		\
+	ll=(c)->A; HOST_l2c(ll,(s));	\
+	ll=(c)->B; HOST_l2c(ll,(s));	\
+	ll=(c)->C; HOST_l2c(ll,(s));	\
+	ll=(c)->D; HOST_l2c(ll,(s));	\
+	} while (0)
+#define HASH_BLOCK_HOST_ORDER	md5_block_host_order
+#if !defined(L_ENDIAN) || defined(md5_block_data_order)
+#define	HASH_BLOCK_DATA_ORDER	md5_block_data_order
+/*
+ * Little-endians (Intel and Alpha) feel better without this.
+ * It looks like memcpy does better job than generic
+ * md5_block_data_order on copying-n-aligning input data.
+ * But frankly speaking I didn't expect such result on Alpha.
+ * On the other hand I've got this with egcs-1.0.2 and if
+ * program is compiled with another (better?) compiler it
+ * might turn out other way around.
+ *
+ *				<appro@fy.chalmers.se>
+ */
+#endif
 
-/* NOTE - c is not incremented as per l2c */
-#undef l2cn
-#define l2cn(l1,l2,c,n)	{ \
-			c+=n; \
-			switch (n) { \
-			case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
-			case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
-			case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
-			case 5: *(--(c))=(unsigned char)(((l2)    )&0xff); \
-			case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
-			case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
-			case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
-			case 1: *(--(c))=(unsigned char)(((l1)    )&0xff); \
-				} \
-			}
+#include "md32_common.h"
 
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
-/* 5 instructions with rotate instruction, else 9 */
-#define Endian_Reverse32(a) \
-	{ \
-	unsigned long l=(a); \
-	(a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
-	}
-#else
-/* 6 instructions with rotate instruction, else 8 */
-#define Endian_Reverse32(a) \
-	{ \
-	unsigned long l=(a); \
-	l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
-	(a)=ROTATE(l,16L); \
-	}
-#endif
 /*
 #define	F(x,y,z)	(((x) & (y))  |  ((~(x)) & (z)))
 #define	G(x,y,z)	(((x) & (z))  |  ((y) & (~(z))))
 */
 
 /* As pointed out by Wei Dai <weidai@eskimo.com>, the above can be
- * simplified to the code below.  Wei attributes these optimisations
+ * simplified to the code below.  Wei attributes these optimizations
  * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
  */
 #define	F(b,c,d)	((((c) ^ (d)) & (b)) ^ (d))
@@ -166,14 +151,6 @@
 #define	H(b,c,d)	((b) ^ (c) ^ (d))
 #define	I(b,c,d)	(((~(d)) | (b)) ^ (c))
 
-#undef ROTATE
-#if defined(WIN32)
-#define ROTATE(a,n)     _lrotl(a,n)
-#else
-#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
-#endif
-
-
 #define R0(a,b,c,d,k,s,t) { \
 	a+=((k)+(t)+F((b),(c),(d))); \
 	a=ROTATE(a,s); \