X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fmd5%2Fmd5_locl.h;h=2f5b045f36cb94e07d56b52dc17805c87b51c161;hp=2962e773e3b01ef06eaff8da06fa7645bc65a920;hb=73a948508131ad3711b7f6e89d270776e2569e01;hpb=ec577822f95a8bca0023c5c77cef1a4916822d4a diff --git a/crypto/md5/md5_locl.h b/crypto/md5/md5_locl.h index 2962e773e3..2f5b045f36 100644 --- a/crypto/md5/md5_locl.h +++ b/crypto/md5/md5_locl.h @@ -56,109 +56,94 @@ * [including the GNU Public Licence.] */ -/* On sparc, this actually slows things down :-( */ -#if defined(sun) -#undef B_ENDIAN -#endif - #include #include +#include #include -#define ULONG unsigned long -#define UCHAR unsigned char -#define UINT unsigned int - -#if defined(NOCONST) -#define const +#ifndef MD5_LONG_LOG2 +#define MD5_LONG_LOG2 2 /* default to 32 bits */ #endif -#undef c2l -#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \ - l|=(((unsigned long)(*((c)++)))<< 8), \ - l|=(((unsigned long)(*((c)++)))<<16), \ - l|=(((unsigned long)(*((c)++)))<<24)) - -#undef p_c2l -#define p_c2l(c,l,n) { \ - switch (n) { \ - case 0: l =((unsigned long)(*((c)++))); \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - case 3: l|=((unsigned long)(*((c)++)))<<24; \ - } \ - } +#ifdef MD5_ASM +# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) +# define md5_block_host_order md5_block_asm_host_order +# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC) + void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,size_t num); +# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned +# endif +#endif -/* NOTE the pointer is not incremented at the end of this */ -#undef c2l_p -#define c2l_p(c,l,n) { \ - l=0; \ - (c)+=n; \ - switch (n) { \ - case 3: l =((unsigned long)(*(--(c))))<<16; \ - case 2: l|=((unsigned long)(*(--(c))))<< 8; \ - case 1: l|=((unsigned long)(*(--(c)))) ; \ - } \ - } +void md5_block_host_order (MD5_CTX *c, const void *p,size_t num); +void md5_block_data_order (MD5_CTX *c, const void *p,size_t num); -#undef p_c2l_p -#define p_c2l_p(c,l,sc,len) { \ - switch (sc) \ - { \ - case 0: l =((unsigned long)(*((c)++))); \ - if (--len == 0) break; \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - if (--len == 0) break; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - } \ - } +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) +/* + * *_block_host_order is expected to handle aligned data while + * *_block_data_order - unaligned. As algorithm and host (x86) + * are in this case of the same "endianness" these two are + * otherwise indistinguishable. But normally you don't want to + * call the same function because unaligned access in places + * where alignment is expected is usually a "Bad Thing". Indeed, + * on RISCs you get punished with BUS ERROR signal or *severe* + * performance degradation. Intel CPUs are in turn perfectly + * capable of loading unaligned data without such drastic side + * effect. Yes, they say it's slower than aligned load, but no + * exception is generated and therefore performance degradation + * is *incomparable* with RISCs. What we should weight here is + * costs of unaligned access against costs of aligning data. + * According to my measurements allowing unaligned access results + * in ~9% performance improvement on Pentium II operating at + * 266MHz. I won't be surprised if the difference will be higher + * on faster systems:-) + * + * + */ +#define md5_block_data_order md5_block_host_order +#endif -#undef l2c -#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ - *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ - *((c)++)=(unsigned char)(((l)>>16)&0xff), \ - *((c)++)=(unsigned char)(((l)>>24)&0xff)) +#define DATA_ORDER_IS_LITTLE_ENDIAN + +#define HASH_LONG MD5_LONG +#define HASH_LONG_LOG2 MD5_LONG_LOG2 +#define HASH_CTX MD5_CTX +#define HASH_CBLOCK MD5_CBLOCK +#define HASH_LBLOCK MD5_LBLOCK +#define HASH_UPDATE MD5_Update +#define HASH_TRANSFORM MD5_Transform +#define HASH_FINAL MD5_Final +#define HASH_MAKE_STRING(c,s) do { \ + unsigned long ll; \ + ll=(c)->A; HOST_l2c(ll,(s)); \ + ll=(c)->B; HOST_l2c(ll,(s)); \ + ll=(c)->C; HOST_l2c(ll,(s)); \ + ll=(c)->D; HOST_l2c(ll,(s)); \ + } while (0) +#define HASH_BLOCK_HOST_ORDER md5_block_host_order +#if !defined(L_ENDIAN) || defined(md5_block_data_order) +#define HASH_BLOCK_DATA_ORDER md5_block_data_order +/* + * Little-endians (Intel and Alpha) feel better without this. + * It looks like memcpy does better job than generic + * md5_block_data_order on copying-n-aligning input data. + * But frankly speaking I didn't expect such result on Alpha. + * On the other hand I've got this with egcs-1.0.2 and if + * program is compiled with another (better?) compiler it + * might turn out other way around. + * + * + */ +#endif -/* NOTE - c is not incremented as per l2c */ -#undef l2cn -#define l2cn(l1,l2,c,n) { \ - c+=n; \ - switch (n) { \ - case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \ - case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \ - case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \ - case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \ - case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \ - case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \ - case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \ - case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \ - } \ - } +#include "md32_common.h" -/* A nice byte order reversal from Wei Dai */ -#if defined(WIN32) -/* 5 instructions with rotate instruction, else 9 */ -#define Endian_Reverse32(a) \ - { \ - unsigned long l=(a); \ - (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \ - } -#else -/* 6 instructions with rotate instruction, else 8 */ -#define Endian_Reverse32(a) \ - { \ - unsigned long l=(a); \ - l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \ - (a)=ROTATE(l,16L); \ - } -#endif /* #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) #define G(x,y,z) (((x) & (z)) | ((y) & (~(z)))) */ /* As pointed out by Wei Dai , the above can be - * simplified to the code below. Wei attributes these optimisations + * simplified to the code below. Wei attributes these optimizations * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel. */ #define F(b,c,d) ((((c) ^ (d)) & (b)) ^ (d)) @@ -166,14 +151,6 @@ #define H(b,c,d) ((b) ^ (c) ^ (d)) #define I(b,c,d) (((~(d)) | (b)) ^ (c)) -#undef ROTATE -#if defined(WIN32) -#define ROTATE(a,n) _lrotl(a,n) -#else -#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) -#endif - - #define R0(a,b,c,d,k,s,t) { \ a+=((k)+(t)+F((b),(c),(d))); \ a=ROTATE(a,s); \