Re-implement md32_common.h [make it simpler!] and eliminate code rendered
authorAndy Polyakov <appro@openssl.org>
Wed, 11 Oct 2006 11:55:11 +0000 (11:55 +0000)
committerAndy Polyakov <appro@openssl.org>
Wed, 11 Oct 2006 11:55:11 +0000 (11:55 +0000)
redundant as result.

12 files changed:
crypto/md32_common.h
crypto/md4/md4_dgst.c
crypto/md4/md4_locl.h
crypto/md5/asm/md5-586.pl
crypto/md5/asm/md5-x86_64.pl
crypto/md5/md5_dgst.c
crypto/md5/md5_locl.h
crypto/ripemd/asm/rmd-586.pl
crypto/ripemd/rmd_dgst.c
crypto/ripemd/rmd_locl.h
crypto/sha/sha256.c
crypto/sha/sha_locl.h

index 3ed16f3..ce956df 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/md32_common.h */
 /* ====================================================================
- * Copyright (c) 1999-2002 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2006 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  * ====================================================================
  *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com).  This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com).
- *
  */
 
 /*
  *             typedef struct {
  *                     ...
  *                     HASH_LONG       Nl,Nh;
+ *                     either {
  *                     HASH_LONG       data[HASH_LBLOCK];
+ *                     unsigned char   data[HASH_CBLOCK];
+ *                     };
  *                     unsigned int    num;
  *                     ...
  *                     } HASH_CTX;
+ *     data[] vector is expected to be zeroed upon first call to
+ *     HASH_UPDATE.
  * HASH_UPDATE
  *     name of "Update" function, implemented here.
  * HASH_TRANSFORM
  *     name of "Transform" function, implemented here.
  * HASH_FINAL
  *     name of "Final" function, implemented here.
- * HASH_BLOCK_HOST_ORDER
- *     name of "block" function treating *aligned* input message
- *     in host byte order, implemented externally.
  * HASH_BLOCK_DATA_ORDER
- *     name of "block" function treating *unaligned* input message
- *     in original (data) byte order, implemented externally (it
- *     actually is optional if data and host are of the same
- *     "endianess").
+ *     name of "block" function capable of treating *unaligned* input
+ *     message in original (data) byte order, implemented externally.
  * HASH_MAKE_STRING
  *     macro convering context variables to an ASCII hash string.
  *
- * Optional macros:
- *
- * B_ENDIAN or L_ENDIAN
- *     defines host byte-order.
- * HASH_LONG_LOG2
- *     defaults to 2 if not states otherwise.
- * HASH_LBLOCK
- *     assumed to be HASH_CBLOCK/4 if not stated otherwise.
- * HASH_BLOCK_DATA_ORDER_ALIGNED
- *     alternative "block" function capable of treating
- *     aligned input message in original (data) order,
- *     implemented externally.
- *
  * MD5 example:
  *
  *     #define DATA_ORDER_IS_LITTLE_ENDIAN
  *     #define HASH_LONG_LOG2          MD5_LONG_LOG2
  *     #define HASH_CTX                MD5_CTX
  *     #define HASH_CBLOCK             MD5_CBLOCK
- *     #define HASH_LBLOCK             MD5_LBLOCK
  *     #define HASH_UPDATE             MD5_Update
  *     #define HASH_TRANSFORM          MD5_Transform
  *     #define HASH_FINAL              MD5_Final
- *     #define HASH_BLOCK_HOST_ORDER   md5_block_host_order
  *     #define HASH_BLOCK_DATA_ORDER   md5_block_data_order
  *
  *                                     <appro@fy.chalmers.se>
 #error "HASH_FINAL must be defined!"
 #endif
 
-#ifndef HASH_BLOCK_HOST_ORDER
-#error "HASH_BLOCK_HOST_ORDER must be defined!"
-#endif
-
-#if 0
-/*
- * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED
- * isn't defined.
- */
 #ifndef HASH_BLOCK_DATA_ORDER
 #error "HASH_BLOCK_DATA_ORDER must be defined!"
 #endif
-#endif
-
-#ifndef HASH_LBLOCK
-#define HASH_LBLOCK    (HASH_CBLOCK/4)
-#endif
-
-#ifndef HASH_LONG_LOG2
-#define HASH_LONG_LOG2 2
-#endif
 
 /*
  * Engage compiler specific rotate intrinsic function if available.
 # endif
 #endif /* PEDANTIC */
 
-#if HASH_LONG_LOG2==2  /* Engage only if sizeof(HASH_LONG)== 4 */
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#ifdef ROTATE
-/* 5 instructions with rotate instruction, else 9 */
-#define REVERSE_FETCH32(a,l)   (                                       \
-               l=*(const HASH_LONG *)(a),                              \
-               ((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24)))  \
-                               )
-#else
-/* 6 instructions with rotate instruction, else 8 */
-#define REVERSE_FETCH32(a,l)   (                               \
-               l=*(const HASH_LONG *)(a),                      \
-               l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)),    \
-               ROTATE(l,16)                                    \
-                               )
-/*
- * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
- * It's rewritten as above for two reasons:
- *     - RISCs aren't good at long constants and have to explicitely
- *       compose 'em with several (well, usually 2) instructions in a
- *       register before performing the actual operation and (as you
- *       already realized:-) having same constant should inspire the
- *       compiler to permanently allocate the only register for it;
- *     - most modern CPUs have two ALUs, but usually only one has
- *       circuitry for shifts:-( this minor tweak inspires compiler
- *       to schedule shift instructions in a better way...
- *
- *                             <appro@fy.chalmers.se>
- */
-#endif
-#endif
-
 #ifndef ROTATE
 #define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
 #endif
 
-/*
- * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
- * and HASH_BLOCK_HOST_ORDER ought to be the same if input data
- * and host are of the same "endianess". It's possible to mask
- * this with blank #define HASH_BLOCK_DATA_ORDER though...
- *
- *                             <appro@fy.chalmers.se>
- */
-#if defined(B_ENDIAN)
-#  if defined(DATA_ORDER_IS_BIG_ENDIAN)
-#    if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
-#      define HASH_BLOCK_DATA_ORDER_ALIGNED    HASH_BLOCK_HOST_ORDER
-#    endif
-#  endif
-#elif defined(L_ENDIAN)
-#  if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-#    if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
-#      define HASH_BLOCK_DATA_ORDER_ALIGNED    HASH_BLOCK_HOST_ORDER
-#    endif
-#  endif
-#endif
-
-#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
-#ifndef HASH_BLOCK_DATA_ORDER
-#error "HASH_BLOCK_DATA_ORDER must be defined!"
-#endif
-#endif
-
 #if defined(DATA_ORDER_IS_BIG_ENDIAN)
 
 #ifndef PEDANTIC
                         l|=(((unsigned long)(*((c)++)))    ),          \
                         l)
 #endif
-#define HOST_p_c2l(c,l,n)      {                                       \
-                       switch (n) {                                    \
-                       case 0: l =((unsigned long)(*((c)++)))<<24;     \
-                       case 1: l|=((unsigned long)(*((c)++)))<<16;     \
-                       case 2: l|=((unsigned long)(*((c)++)))<< 8;     \
-                       case 3: l|=((unsigned long)(*((c)++)));         \
-                               } }
-#define HOST_p_c2l_p(c,l,sc,len) {                                     \
-                       switch (sc) {                                   \
-                       case 0: l =((unsigned long)(*((c)++)))<<24;     \
-                               if (--len == 0) break;                  \
-                       case 1: l|=((unsigned long)(*((c)++)))<<16;     \
-                               if (--len == 0) break;                  \
-                       case 2: l|=((unsigned long)(*((c)++)))<< 8;     \
-                               } }
-/* NOTE the pointer is not incremented at the end of this */
-#define HOST_c2l_p(c,l,n)      {                                       \
-                       l=0; (c)+=n;                                    \
-                       switch (n) {                                    \
-                       case 3: l =((unsigned long)(*(--(c))))<< 8;     \
-                       case 2: l|=((unsigned long)(*(--(c))))<<16;     \
-                       case 1: l|=((unsigned long)(*(--(c))))<<24;     \
-                               } }
 #ifndef HOST_l2c
 #define HOST_l2c(l,c)  (*((c)++)=(unsigned char)(((l)>>24)&0xff),      \
                         *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
                         l|=(((unsigned long)(*((c)++)))<<24),          \
                         l)
 #endif
-#define HOST_p_c2l(c,l,n)      {                                       \
-                       switch (n) {                                    \
-                       case 0: l =((unsigned long)(*((c)++)));         \
-                       case 1: l|=((unsigned long)(*((c)++)))<< 8;     \
-                       case 2: l|=((unsigned long)(*((c)++)))<<16;     \
-                       case 3: l|=((unsigned long)(*((c)++)))<<24;     \
-                               } }
-#define HOST_p_c2l_p(c,l,sc,len) {                                     \
-                       switch (sc) {                                   \
-                       case 0: l =((unsigned long)(*((c)++)));         \
-                               if (--len == 0) break;                  \
-                       case 1: l|=((unsigned long)(*((c)++)))<< 8;     \
-                               if (--len == 0) break;                  \
-                       case 2: l|=((unsigned long)(*((c)++)))<<16;     \
-                               } }
-/* NOTE the pointer is not incremented at the end of this */
-#define HOST_c2l_p(c,l,n)      {                                       \
-                       l=0; (c)+=n;                                    \
-                       switch (n) {                                    \
-                       case 3: l =((unsigned long)(*(--(c))))<<16;     \
-                       case 2: l|=((unsigned long)(*(--(c))))<< 8;     \
-                       case 1: l|=((unsigned long)(*(--(c))));         \
-                               } }
 #ifndef HOST_l2c
 #define HOST_l2c(l,c)  (*((c)++)=(unsigned char)(((l)    )&0xff),      \
                         *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
 int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
        {
        const unsigned char *data=data_;
-       register HASH_LONG * p;
-       register HASH_LONG l;
-       size_t sw,sc,ew,ec;
+       unsigned char *p;
+       HASH_LONG l;
+       size_t n;
 
        if (len==0) return 1;
 
@@ -416,101 +273,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
        c->Nh+=(len>>29);       /* might cause compiler warning on 16-bit */
        c->Nl=l;
 
-       if (c->num != 0)
+       n = c->num;
+       if (n != 0)
                {
-               p=c->data;
-               sw=c->num>>2;
-               sc=c->num&0x03;
+               p=(unsigned char *)c->data;
 
-               if ((c->num+len) >= HASH_CBLOCK)
+               if ((n+len) >= HASH_CBLOCK)
                        {
-                       l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
-                       for (; sw<HASH_LBLOCK; sw++)
-                               {
-                               HOST_c2l(data,l); p[sw]=l;
-                               }
-                       HASH_BLOCK_HOST_ORDER (c,p,1);
-                       len-=(HASH_CBLOCK-c->num);
-                       c->num=0;
-                       /* drop through and do the rest */
+                       memcpy (p+n,data,HASH_CBLOCK-n);
+                       HASH_BLOCK_DATA_ORDER (c,p,1);
+                       n      = HASH_CBLOCK-n;
+                       data  += n;
+                       len   -= n;
+                       c->num = 0;
+                       memset (p,0,HASH_CBLOCK);       /* keep it zeroed */
                        }
                else
                        {
-                       c->num+=(unsigned int)len;
-                       if ((sc+len) < 4) /* ugly, add char's to a word */
-                               {
-                               l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
-                               }
-                       else
-                               {
-                               ew=(c->num>>2);
-                               ec=(c->num&0x03);
-                               if (sc)
-                                       l=p[sw];
-                               HOST_p_c2l(data,l,sc);
-                               p[sw++]=l;
-                               for (; sw < ew; sw++)
-                                       {
-                                       HOST_c2l(data,l); p[sw]=l;
-                                       }
-                               if (ec)
-                                       {
-                                       HOST_c2l_p(data,l,ec); p[sw]=l;
-                                       }
-                               }
+                       memcpy (p+n,data,len);
+                       c->num += (unsigned int)len;
                        return 1;
                        }
                }
 
-       sw=len/HASH_CBLOCK;
-       if (sw > 0)
+       n = len/HASH_CBLOCK;
+       if (n > 0)
                {
-#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
-               /*
-                * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
-                * only if sizeof(HASH_LONG)==4.
-                */
-               if ((((size_t)data)%4) == 0)
-                       {
-                       /* data is properly aligned so that we can cast it: */
-                       HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw);
-                       sw*=HASH_CBLOCK;
-                       data+=sw;
-                       len-=sw;
-                       }
-               else
-#if !defined(HASH_BLOCK_DATA_ORDER)
-                       while (sw--)
-                               {
-                               memcpy (p=c->data,data,HASH_CBLOCK);
-                               HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1);
-                               data+=HASH_CBLOCK;
-                               len-=HASH_CBLOCK;
-                               }
-#endif
-#endif
-#if defined(HASH_BLOCK_DATA_ORDER)
-                       {
-                       HASH_BLOCK_DATA_ORDER(c,data,sw);
-                       sw*=HASH_CBLOCK;
-                       data+=sw;
-                       len-=sw;
-                       }
-#endif
+               HASH_BLOCK_DATA_ORDER (c,data,n);
+               n    *= HASH_CBLOCK;
+               data += n;
+               len  -= n;
                }
 
-       if (len!=0)
+       if (len != 0)
                {
-               p = c->data;
+               p = (unsigned char *)c->data;
                c->num = len;
-               ew=len>>2;      /* words to copy */
-               ec=len&0x03;
-               for (; ew; ew--,p++)
-                       {
-                       HOST_c2l(data,l); *p=l;
-                       }
-               HOST_c2l_p(data,l,ec);
-               *p=l;
+               memcpy (p,data,len);
                }
        return 1;
        }
@@ -518,73 +317,36 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
 
 void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
        {
-#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
-       if ((((size_t)data)%4) == 0)
-               /* data is properly aligned so that we can cast it: */
-               HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1);
-       else
-#if !defined(HASH_BLOCK_DATA_ORDER)
-               {
-               memcpy (c->data,data,HASH_CBLOCK);
-               HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1);
-               }
-#endif
-#endif
-#if defined(HASH_BLOCK_DATA_ORDER)
        HASH_BLOCK_DATA_ORDER (c,data,1);
-#endif
        }
 
 
 int HASH_FINAL (unsigned char *md, HASH_CTX *c)
        {
-       register HASH_LONG *p;
-       register unsigned long l;
-       register int i,j;
-       static const unsigned char end[4]={0x80,0x00,0x00,0x00};
-       const unsigned char *cp=end;
-
-       /* c->num should definitly have room for at least one more byte. */
-       p=c->data;
-       i=c->num>>2;
-       j=c->num&0x03;
-
-#if 0
-       /* purify often complains about the following line as an
-        * Uninitialized Memory Read.  While this can be true, the
-        * following p_c2l macro will reset l when that case is true.
-        * This is because j&0x03 contains the number of 'valid' bytes
-        * already in p[i].  If and only if j&0x03 == 0, the UMR will
-        * occur but this is also the only time p_c2l will do
-        * l= *(cp++) instead of l|= *(cp++)
-        * Many thanks to Alex Tang <altitude@cic.net> for pickup this
-        * 'potential bug' */
-#ifdef PURIFY
-       if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */
-#endif
-       l=p[i];
-#else
-       l = (j==0) ? 0 : p[i];
-#endif
-       HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
+       unsigned char *p = (unsigned char *)c->data;
+       size_t n = c->num;
 
-       if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */
+       p[n] = 0x80; /* there is always room for one */
+       n++;
+
+       if (n > (HASH_CBLOCK-8))
                {
-               if (i<HASH_LBLOCK) p[i]=0;
-               HASH_BLOCK_HOST_ORDER (c,p,1);
-               i=0;
+               HASH_BLOCK_DATA_ORDER (c,p,1);
+               memset (p,0,HASH_CBLOCK);
                }
-       for (; i<(HASH_LBLOCK-2); i++)
-               p[i]=0;
 
+       p += HASH_CBLOCK-8;
 #if   defined(DATA_ORDER_IS_BIG_ENDIAN)
-       p[HASH_LBLOCK-2]=c->Nh;
-       p[HASH_LBLOCK-1]=c->Nl;
+       (void)HOST_l2c(c->Nh,p);
+       (void)HOST_l2c(c->Nl,p);
 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-       p[HASH_LBLOCK-2]=c->Nl;
-       p[HASH_LBLOCK-1]=c->Nh;
+       (void)HOST_l2c(c->Nl,p);
+       (void)HOST_l2c(c->Nh,p);
 #endif
-       HASH_BLOCK_HOST_ORDER (c,p,1);
+       p -= HASH_CBLOCK;
+       HASH_BLOCK_DATA_ORDER (c,p,1);
+       c->num=0;
+       memset (p,0,HASH_CBLOCK);
 
 #ifndef HASH_MAKE_STRING
 #error "HASH_MAKE_STRING must be defined!"
@@ -592,11 +354,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c)
        HASH_MAKE_STRING(c,md);
 #endif
 
-       c->num=0;
-       /* clear stuff, HASH_BLOCK may be leaving some stuff on the stack
-        * but I'm not worried :-)
-       OPENSSL_cleanse((void *)c,sizeof(HASH_CTX));
-        */
        return 1;
        }
 
index d4c7057..e5b08f3 100644 (file)
@@ -72,89 +72,14 @@ const char *MD4_version="MD4" OPENSSL_VERSION_PTEXT;
 
 int MD4_Init(MD4_CTX *c)
        {
+       memset (c,0,sizeof(*c));
        c->A=INIT_DATA_A;
        c->B=INIT_DATA_B;
        c->C=INIT_DATA_C;
        c->D=INIT_DATA_D;
-       c->Nl=0;
-       c->Nh=0;
-       c->num=0;
        return 1;
        }
 
-#ifndef md4_block_host_order
-void md4_block_host_order (MD4_CTX *c, const void *data, size_t num)
-       {
-       const MD4_LONG *X=data;
-       register unsigned MD32_REG_T A,B,C,D;
-
-       A=c->A;
-       B=c->B;
-       C=c->C;
-       D=c->D;
-
-       for (;num--;X+=HASH_LBLOCK)
-               {
-       /* Round 0 */
-       R0(A,B,C,D,X[ 0], 3,0);
-       R0(D,A,B,C,X[ 1], 7,0);
-       R0(C,D,A,B,X[ 2],11,0);
-       R0(B,C,D,A,X[ 3],19,0);
-       R0(A,B,C,D,X[ 4], 3,0);
-       R0(D,A,B,C,X[ 5], 7,0);
-       R0(C,D,A,B,X[ 6],11,0);
-       R0(B,C,D,A,X[ 7],19,0);
-       R0(A,B,C,D,X[ 8], 3,0);
-       R0(D,A,B,C,X[ 9], 7,0);
-       R0(C,D,A,B,X[10],11,0);
-       R0(B,C,D,A,X[11],19,0);
-       R0(A,B,C,D,X[12], 3,0);
-       R0(D,A,B,C,X[13], 7,0);
-       R0(C,D,A,B,X[14],11,0);
-       R0(B,C,D,A,X[15],19,0);
-       /* Round 1 */
-       R1(A,B,C,D,X[ 0], 3,0x5A827999L);
-       R1(D,A,B,C,X[ 4], 5,0x5A827999L);
-       R1(C,D,A,B,X[ 8], 9,0x5A827999L);
-       R1(B,C,D,A,X[12],13,0x5A827999L);
-       R1(A,B,C,D,X[ 1], 3,0x5A827999L);
-       R1(D,A,B,C,X[ 5], 5,0x5A827999L);
-       R1(C,D,A,B,X[ 9], 9,0x5A827999L);
-       R1(B,C,D,A,X[13],13,0x5A827999L);
-       R1(A,B,C,D,X[ 2], 3,0x5A827999L);
-       R1(D,A,B,C,X[ 6], 5,0x5A827999L);
-       R1(C,D,A,B,X[10], 9,0x5A827999L);
-       R1(B,C,D,A,X[14],13,0x5A827999L);
-       R1(A,B,C,D,X[ 3], 3,0x5A827999L);
-       R1(D,A,B,C,X[ 7], 5,0x5A827999L);
-       R1(C,D,A,B,X[11], 9,0x5A827999L);
-       R1(B,C,D,A,X[15],13,0x5A827999L);
-       /* Round 2 */
-       R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1);
-       R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1);
-       R2(C,D,A,B,X[ 4],11,0x6ED9EBA1);
-       R2(B,C,D,A,X[12],15,0x6ED9EBA1);
-       R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1);
-       R2(D,A,B,C,X[10], 9,0x6ED9EBA1);
-       R2(C,D,A,B,X[ 6],11,0x6ED9EBA1);
-       R2(B,C,D,A,X[14],15,0x6ED9EBA1);
-       R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1);
-       R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1);
-       R2(C,D,A,B,X[ 5],11,0x6ED9EBA1);
-       R2(B,C,D,A,X[13],15,0x6ED9EBA1);
-       R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1);
-       R2(D,A,B,C,X[11], 9,0x6ED9EBA1);
-       R2(C,D,A,B,X[ 7],11,0x6ED9EBA1);
-       R2(B,C,D,A,X[15],15,0x6ED9EBA1);
-
-       A = c->A += A;
-       B = c->B += B;
-       C = c->C += C;
-       D = c->D += D;
-               }
-       }
-#endif
-
 #ifndef md4_block_data_order
 #ifdef X
 #undef X
@@ -240,19 +165,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num)
                }
        }
 #endif
-
-#ifdef undef
-int printit(unsigned long *l)
-       {
-       int i,ii;
-
-       for (i=0; i<2; i++)
-               {
-               for (ii=0; ii<8; ii++)
-                       {
-                       fprintf(stderr,"%08lx ",l[i*8+ii]);
-                       }
-               fprintf(stderr,"\n");
-               }
-       }
-#endif
index 625b587..c8085b0 100644 (file)
 #define MD4_LONG_LOG2 2 /* default to 32 bits */
 #endif
 
-void md4_block_host_order (MD4_CTX *c, const void *p,size_t num);
 void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
 
-#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \
-    defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-# if !defined(B_ENDIAN)
-/*
- * *_block_host_order is expected to handle aligned data while
- * *_block_data_order - unaligned. As algorithm and host (x86)
- * are in this case of the same "endianness" these two are
- * otherwise indistinguishable. But normally you don't want to
- * call the same function because unaligned access in places
- * where alignment is expected is usually a "Bad Thing". Indeed,
- * on RISCs you get punished with BUS ERROR signal or *severe*
- * performance degradation. Intel CPUs are in turn perfectly
- * capable of loading unaligned data without such drastic side
- * effect. Yes, they say it's slower than aligned load, but no
- * exception is generated and therefore performance degradation
- * is *incomparable* with RISCs. What we should weight here is
- * costs of unaligned access against costs of aligning data.
- * According to my measurements allowing unaligned access results
- * in ~9% performance improvement on Pentium II operating at
- * 266MHz. I won't be surprised if the difference will be higher
- * on faster systems:-)
- *
- *                             <appro@fy.chalmers.se>
- */
-# define md4_block_data_order md4_block_host_order
-# endif
-#endif
-
 #define DATA_ORDER_IS_LITTLE_ENDIAN
 
 #define HASH_LONG              MD4_LONG
-#define HASH_LONG_LOG2         MD4_LONG_LOG2
 #define HASH_CTX               MD4_CTX
 #define HASH_CBLOCK            MD4_CBLOCK
-#define HASH_LBLOCK            MD4_LBLOCK
 #define HASH_UPDATE            MD4_Update
 #define HASH_TRANSFORM         MD4_Transform
 #define HASH_FINAL             MD4_Final
@@ -113,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
        ll=(c)->C; HOST_l2c(ll,(s));    \
        ll=(c)->D; HOST_l2c(ll,(s));    \
        } while (0)
-#define HASH_BLOCK_HOST_ORDER  md4_block_host_order
-#if !defined(L_ENDIAN) || defined(md4_block_data_order)
 #define        HASH_BLOCK_DATA_ORDER   md4_block_data_order
-/*
- * Little-endians (Intel and Alpha) feel better without this.
- * It looks like memcpy does better job than generic
- * md4_block_data_order on copying-n-aligning input data.
- * But frankly speaking I didn't expect such result on Alpha.
- * On the other hand I've got this with egcs-1.0.2 and if
- * program is compiled with another (better?) compiler it
- * might turn out other way around.
- *
- *                             <appro@fy.chalmers.se>
- */
-#endif
 
 #include "md32_common.h"
 
index fa3fa3b..76ac235 100644 (file)
@@ -29,7 +29,7 @@ $X="esi";
  0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
  );
 
-&md5_block("md5_block_asm_host_order");
+&md5_block("md5_block_asm_data_order");
 &asm_finish();
 
 sub Np
index 75b4085..4330edd 100755 (executable)
@@ -115,9 +115,9 @@ $code .= <<EOF;
 .text
 .align 16
 
-.globl md5_block_asm_host_order
-.type md5_block_asm_host_order,\@function,3
-md5_block_asm_host_order:
+.globl md5_block_asm_data_order
+.type md5_block_asm_data_order,\@function,3
+md5_block_asm_data_order:
        push    %rbp
        push    %rbx
        push    %r12
@@ -243,7 +243,7 @@ $code .= <<EOF;
        pop     %rbx
        pop     %rbp
        ret
-.size md5_block_asm_host_order,.-md5_block_asm_host_order
+.size md5_block_asm_data_order,.-md5_block_asm_data_order
 EOF
 
 print $code;
index f97f48e..2708e16 100644 (file)
@@ -72,106 +72,14 @@ const char *MD5_version="MD5" OPENSSL_VERSION_PTEXT;
 
 int MD5_Init(MD5_CTX *c)
        {
+       memset (c,0,sizeof(*c));
        c->A=INIT_DATA_A;
        c->B=INIT_DATA_B;
        c->C=INIT_DATA_C;
        c->D=INIT_DATA_D;
-       c->Nl=0;
-       c->Nh=0;
-       c->num=0;
        return 1;
        }
 
-#ifndef md5_block_host_order
-void md5_block_host_order (MD5_CTX *c, const void *data, size_t num)
-       {
-       const MD5_LONG *X=data;
-       register unsigned MD32_REG_T A,B,C,D;
-
-       A=c->A;
-       B=c->B;
-       C=c->C;
-       D=c->D;
-
-       for (;num--;X+=HASH_LBLOCK)
-               {
-       /* Round 0 */
-       R0(A,B,C,D,X[ 0], 7,0xd76aa478L);
-       R0(D,A,B,C,X[ 1],12,0xe8c7b756L);
-       R0(C,D,A,B,X[ 2],17,0x242070dbL);
-       R0(B,C,D,A,X[ 3],22,0xc1bdceeeL);
-       R0(A,B,C,D,X[ 4], 7,0xf57c0fafL);
-       R0(D,A,B,C,X[ 5],12,0x4787c62aL);
-       R0(C,D,A,B,X[ 6],17,0xa8304613L);
-       R0(B,C,D,A,X[ 7],22,0xfd469501L);
-       R0(A,B,C,D,X[ 8], 7,0x698098d8L);
-       R0(D,A,B,C,X[ 9],12,0x8b44f7afL);
-       R0(C,D,A,B,X[10],17,0xffff5bb1L);
-       R0(B,C,D,A,X[11],22,0x895cd7beL);
-       R0(A,B,C,D,X[12], 7,0x6b901122L);
-       R0(D,A,B,C,X[13],12,0xfd987193L);
-       R0(C,D,A,B,X[14],17,0xa679438eL);
-       R0(B,C,D,A,X[15],22,0x49b40821L);
-       /* Round 1 */
-       R1(A,B,C,D,X[ 1], 5,0xf61e2562L);
-       R1(D,A,B,C,X[ 6], 9,0xc040b340L);
-       R1(C,D,A,B,X[11],14,0x265e5a51L);
-       R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL);
-       R1(A,B,C,D,X[ 5], 5,0xd62f105dL);
-       R1(D,A,B,C,X[10], 9,0x02441453L);
-       R1(C,D,A,B,X[15],14,0xd8a1e681L);
-       R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L);
-       R1(A,B,C,D,X[ 9], 5,0x21e1cde6L);
-       R1(D,A,B,C,X[14], 9,0xc33707d6L);
-       R1(C,D,A,B,X[ 3],14,0xf4d50d87L);
-       R1(B,C,D,A,X[ 8],20,0x455a14edL);
-       R1(A,B,C,D,X[13], 5,0xa9e3e905L);
-       R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L);
-       R1(C,D,A,B,X[ 7],14,0x676f02d9L);
-       R1(B,C,D,A,X[12],20,0x8d2a4c8aL);
-       /* Round 2 */
-       R2(A,B,C,D,X[ 5], 4,0xfffa3942L);
-       R2(D,A,B,C,X[ 8],11,0x8771f681L);
-       R2(C,D,A,B,X[11],16,0x6d9d6122L);
-       R2(B,C,D,A,X[14],23,0xfde5380cL);
-       R2(A,B,C,D,X[ 1], 4,0xa4beea44L);
-       R2(D,A,B,C,X[ 4],11,0x4bdecfa9L);
-       R2(C,D,A,B,X[ 7],16,0xf6bb4b60L);
-       R2(B,C,D,A,X[10],23,0xbebfbc70L);
-       R2(A,B,C,D,X[13], 4,0x289b7ec6L);
-       R2(D,A,B,C,X[ 0],11,0xeaa127faL);
-       R2(C,D,A,B,X[ 3],16,0xd4ef3085L);
-       R2(B,C,D,A,X[ 6],23,0x04881d05L);
-       R2(A,B,C,D,X[ 9], 4,0xd9d4d039L);
-       R2(D,A,B,C,X[12],11,0xe6db99e5L);
-       R2(C,D,A,B,X[15],16,0x1fa27cf8L);
-       R2(B,C,D,A,X[ 2],23,0xc4ac5665L);
-       /* Round 3 */
-       R3(A,B,C,D,X[ 0], 6,0xf4292244L);
-       R3(D,A,B,C,X[ 7],10,0x432aff97L);
-       R3(C,D,A,B,X[14],15,0xab9423a7L);
-       R3(B,C,D,A,X[ 5],21,0xfc93a039L);
-       R3(A,B,C,D,X[12], 6,0x655b59c3L);
-       R3(D,A,B,C,X[ 3],10,0x8f0ccc92L);
-       R3(C,D,A,B,X[10],15,0xffeff47dL);
-       R3(B,C,D,A,X[ 1],21,0x85845dd1L);
-       R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL);
-       R3(D,A,B,C,X[15],10,0xfe2ce6e0L);
-       R3(C,D,A,B,X[ 6],15,0xa3014314L);
-       R3(B,C,D,A,X[13],21,0x4e0811a1L);
-       R3(A,B,C,D,X[ 4], 6,0xf7537e82L);
-       R3(D,A,B,C,X[11],10,0xbd3af235L);
-       R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
-       R3(B,C,D,A,X[ 9],21,0xeb86d391L);
-
-       A = c->A += A;
-       B = c->B += B;
-       C = c->C += C;
-       D = c->D += D;
-               }
-       }
-#endif
-
 #ifndef md5_block_data_order
 #ifdef X
 #undef X
@@ -274,19 +182,3 @@ void md5_block_data_order (MD5_CTX *c, const void *data_, size_t num)
                }
        }
 #endif
-
-#ifdef undef
-int printit(unsigned long *l)
-       {
-       int i,ii;
-
-       for (i=0; i<2; i++)
-               {
-               for (ii=0; ii<8; ii++)
-                       {
-                       fprintf(stderr,"%08lx ",l[i*8+ii]);
-                       }
-               fprintf(stderr,"\n");
-               }
-       }
-#endif
index d1375dd..968d577 100644 (file)
 #ifdef MD5_ASM
 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \
      defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-#  define md5_block_host_order md5_block_asm_host_order
-# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
-   void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,size_t num);
-#  define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
+#  define md5_block_data_order md5_block_asm_data_order
 # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
-#  define md5_block_host_order md5_block_asm_host_order
 #  define md5_block_data_order md5_block_asm_data_order
 # endif
 #endif
 
-void md5_block_host_order (MD5_CTX *c, const void *p,size_t num);
 void md5_block_data_order (MD5_CTX *c, const void *p,size_t num);
 
-#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \
-    defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-# if !defined(B_ENDIAN)
-/*
- * *_block_host_order is expected to handle aligned data while
- * *_block_data_order - unaligned. As algorithm and host (x86)
- * are in this case of the same "endianness" these two are
- * otherwise indistinguishable. But normally you don't want to
- * call the same function because unaligned access in places
- * where alignment is expected is usually a "Bad Thing". Indeed,
- * on RISCs you get punished with BUS ERROR signal or *severe*
- * performance degradation. Intel CPUs are in turn perfectly
- * capable of loading unaligned data without such drastic side
- * effect. Yes, they say it's slower than aligned load, but no
- * exception is generated and therefore performance degradation
- * is *incomparable* with RISCs. What we should weight here is
- * costs of unaligned access against costs of aligning data.
- * According to my measurements allowing unaligned access results
- * in ~9% performance improvement on Pentium II operating at
- * 266MHz. I won't be surprised if the difference will be higher
- * on faster systems:-)
- *
- *                             <appro@fy.chalmers.se>
- */
-# define md5_block_data_order md5_block_host_order
-# endif
-#endif
-
 #define DATA_ORDER_IS_LITTLE_ENDIAN
 
 #define HASH_LONG              MD5_LONG
-#define HASH_LONG_LOG2         MD5_LONG_LOG2
 #define HASH_CTX               MD5_CTX
 #define HASH_CBLOCK            MD5_CBLOCK
-#define HASH_LBLOCK            MD5_LBLOCK
 #define HASH_UPDATE            MD5_Update
 #define HASH_TRANSFORM         MD5_Transform
 #define HASH_FINAL             MD5_Final
@@ -126,21 +91,7 @@ void md5_block_data_order (MD5_CTX *c, const void *p,size_t num);
        ll=(c)->C; HOST_l2c(ll,(s));    \
        ll=(c)->D; HOST_l2c(ll,(s));    \
        } while (0)
-#define HASH_BLOCK_HOST_ORDER  md5_block_host_order
-#if !defined(L_ENDIAN) || defined(md5_block_data_order)
 #define        HASH_BLOCK_DATA_ORDER   md5_block_data_order
-/*
- * Little-endians (Intel and Alpha) feel better without this.
- * It looks like memcpy does better job than generic
- * md5_block_data_order on copying-n-aligning input data.
- * But frankly speaking I didn't expect such result on Alpha.
- * On the other hand I've got this with egcs-1.0.2 and if
- * program is compiled with another (better?) compiler it
- * might turn out other way around.
- *
- *                             <appro@fy.chalmers.se>
- */
-#endif
 
 #include "md32_common.h"
 
index 0ab6f76..4f3c4c9 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/local/bin/perl
 
 # Normal is the
-# ripemd160_block_asm_host_order(RIPEMD160_CTX *c, ULONG *X,int blocks);
+# ripemd160_block_asm_data_order(RIPEMD160_CTX *c, ULONG *X,int blocks);
 
 $normal=0;
 
@@ -56,7 +56,7 @@ $KR3=0x7A6D76E9;
         8, 5,12, 9,12, 5,14, 6, 8,13, 6, 5,15,13,11,11,
        );
 
-&ripemd160_block("ripemd160_block_asm_host_order");
+&ripemd160_block("ripemd160_block_asm_data_order");
 &asm_finish();
 
 sub Xv
index 03a286d..b04d7bf 100644 (file)
@@ -71,218 +71,15 @@ const char *RMD160_version="RIPE-MD160" OPENSSL_VERSION_PTEXT;
 
 int RIPEMD160_Init(RIPEMD160_CTX *c)
        {
+       memset (c,0,sizeof(*c));
        c->A=RIPEMD160_A;
        c->B=RIPEMD160_B;
        c->C=RIPEMD160_C;
        c->D=RIPEMD160_D;
        c->E=RIPEMD160_E;
-       c->Nl=0;
-       c->Nh=0;
-       c->num=0;
        return 1;
        }
 
-#ifndef ripemd160_block_host_order
-#ifdef X
-#undef X
-#endif
-#define X(i)   XX[i]
-void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, size_t num)
-       {
-       const RIPEMD160_LONG *XX=p;
-       register unsigned MD32_REG_T A,B,C,D,E;
-       register unsigned MD32_REG_T a,b,c,d,e;
-
-       for (;num--;XX+=HASH_LBLOCK)
-               {
-
-       A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E;
-
-       RIP1(A,B,C,D,E,WL00,SL00);
-       RIP1(E,A,B,C,D,WL01,SL01);
-       RIP1(D,E,A,B,C,WL02,SL02);
-       RIP1(C,D,E,A,B,WL03,SL03);
-       RIP1(B,C,D,E,A,WL04,SL04);
-       RIP1(A,B,C,D,E,WL05,SL05);
-       RIP1(E,A,B,C,D,WL06,SL06);
-       RIP1(D,E,A,B,C,WL07,SL07);
-       RIP1(C,D,E,A,B,WL08,SL08);
-       RIP1(B,C,D,E,A,WL09,SL09);
-       RIP1(A,B,C,D,E,WL10,SL10);
-       RIP1(E,A,B,C,D,WL11,SL11);
-       RIP1(D,E,A,B,C,WL12,SL12);
-       RIP1(C,D,E,A,B,WL13,SL13);
-       RIP1(B,C,D,E,A,WL14,SL14);
-       RIP1(A,B,C,D,E,WL15,SL15);
-
-       RIP2(E,A,B,C,D,WL16,SL16,KL1);
-       RIP2(D,E,A,B,C,WL17,SL17,KL1);
-       RIP2(C,D,E,A,B,WL18,SL18,KL1);
-       RIP2(B,C,D,E,A,WL19,SL19,KL1);
-       RIP2(A,B,C,D,E,WL20,SL20,KL1);
-       RIP2(E,A,B,C,D,WL21,SL21,KL1);
-       RIP2(D,E,A,B,C,WL22,SL22,KL1);
-       RIP2(C,D,E,A,B,WL23,SL23,KL1);
-       RIP2(B,C,D,E,A,WL24,SL24,KL1);
-       RIP2(A,B,C,D,E,WL25,SL25,KL1);
-       RIP2(E,A,B,C,D,WL26,SL26,KL1);
-       RIP2(D,E,A,B,C,WL27,SL27,KL1);
-       RIP2(C,D,E,A,B,WL28,SL28,KL1);
-       RIP2(B,C,D,E,A,WL29,SL29,KL1);
-       RIP2(A,B,C,D,E,WL30,SL30,KL1);
-       RIP2(E,A,B,C,D,WL31,SL31,KL1);
-
-       RIP3(D,E,A,B,C,WL32,SL32,KL2);
-       RIP3(C,D,E,A,B,WL33,SL33,KL2);
-       RIP3(B,C,D,E,A,WL34,SL34,KL2);
-       RIP3(A,B,C,D,E,WL35,SL35,KL2);
-       RIP3(E,A,B,C,D,WL36,SL36,KL2);
-       RIP3(D,E,A,B,C,WL37,SL37,KL2);
-       RIP3(C,D,E,A,B,WL38,SL38,KL2);
-       RIP3(B,C,D,E,A,WL39,SL39,KL2);
-       RIP3(A,B,C,D,E,WL40,SL40,KL2);
-       RIP3(E,A,B,C,D,WL41,SL41,KL2);
-       RIP3(D,E,A,B,C,WL42,SL42,KL2);
-       RIP3(C,D,E,A,B,WL43,SL43,KL2);
-       RIP3(B,C,D,E,A,WL44,SL44,KL2);
-       RIP3(A,B,C,D,E,WL45,SL45,KL2);
-       RIP3(E,A,B,C,D,WL46,SL46,KL2);
-       RIP3(D,E,A,B,C,WL47,SL47,KL2);
-
-       RIP4(C,D,E,A,B,WL48,SL48,KL3);
-       RIP4(B,C,D,E,A,WL49,SL49,KL3);
-       RIP4(A,B,C,D,E,WL50,SL50,KL3);
-       RIP4(E,A,B,C,D,WL51,SL51,KL3);
-       RIP4(D,E,A,B,C,WL52,SL52,KL3);
-       RIP4(C,D,E,A,B,WL53,SL53,KL3);
-       RIP4(B,C,D,E,A,WL54,SL54,KL3);
-       RIP4(A,B,C,D,E,WL55,SL55,KL3);
-       RIP4(E,A,B,C,D,WL56,SL56,KL3);
-       RIP4(D,E,A,B,C,WL57,SL57,KL3);
-       RIP4(C,D,E,A,B,WL58,SL58,KL3);
-       RIP4(B,C,D,E,A,WL59,SL59,KL3);
-       RIP4(A,B,C,D,E,WL60,SL60,KL3);
-       RIP4(E,A,B,C,D,WL61,SL61,KL3);
-       RIP4(D,E,A,B,C,WL62,SL62,KL3);
-       RIP4(C,D,E,A,B,WL63,SL63,KL3);
-
-       RIP5(B,C,D,E,A,WL64,SL64,KL4);
-       RIP5(A,B,C,D,E,WL65,SL65,KL4);
-       RIP5(E,A,B,C,D,WL66,SL66,KL4);
-       RIP5(D,E,A,B,C,WL67,SL67,KL4);
-       RIP5(C,D,E,A,B,WL68,SL68,KL4);
-       RIP5(B,C,D,E,A,WL69,SL69,KL4);
-       RIP5(A,B,C,D,E,WL70,SL70,KL4);
-       RIP5(E,A,B,C,D,WL71,SL71,KL4);
-       RIP5(D,E,A,B,C,WL72,SL72,KL4);
-       RIP5(C,D,E,A,B,WL73,SL73,KL4);
-       RIP5(B,C,D,E,A,WL74,SL74,KL4);
-       RIP5(A,B,C,D,E,WL75,SL75,KL4);
-       RIP5(E,A,B,C,D,WL76,SL76,KL4);
-       RIP5(D,E,A,B,C,WL77,SL77,KL4);
-       RIP5(C,D,E,A,B,WL78,SL78,KL4);
-       RIP5(B,C,D,E,A,WL79,SL79,KL4);
-
-       a=A; b=B; c=C; d=D; e=E;
-       /* Do other half */
-       A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E;
-
-       RIP5(A,B,C,D,E,WR00,SR00,KR0);
-       RIP5(E,A,B,C,D,WR01,SR01,KR0);
-       RIP5(D,E,A,B,C,WR02,SR02,KR0);
-       RIP5(C,D,E,A,B,WR03,SR03,KR0);
-       RIP5(B,C,D,E,A,WR04,SR04,KR0);
-       RIP5(A,B,C,D,E,WR05,SR05,KR0);
-       RIP5(E,A,B,C,D,WR06,SR06,KR0);
-       RIP5(D,E,A,B,C,WR07,SR07,KR0);
-       RIP5(C,D,E,A,B,WR08,SR08,KR0);
-       RIP5(B,C,D,E,A,WR09,SR09,KR0);
-       RIP5(A,B,C,D,E,WR10,SR10,KR0);
-       RIP5(E,A,B,C,D,WR11,SR11,KR0);
-       RIP5(D,E,A,B,C,WR12,SR12,KR0);
-       RIP5(C,D,E,A,B,WR13,SR13,KR0);
-       RIP5(B,C,D,E,A,WR14,SR14,KR0);
-       RIP5(A,B,C,D,E,WR15,SR15,KR0);
-
-       RIP4(E,A,B,C,D,WR16,SR16,KR1);
-       RIP4(D,E,A,B,C,WR17,SR17,KR1);
-       RIP4(C,D,E,A,B,WR18,SR18,KR1);
-       RIP4(B,C,D,E,A,WR19,SR19,KR1);
-       RIP4(A,B,C,D,E,WR20,SR20,KR1);
-       RIP4(E,A,B,C,D,WR21,SR21,KR1);
-       RIP4(D,E,A,B,C,WR22,SR22,KR1);
-       RIP4(C,D,E,A,B,WR23,SR23,KR1);
-       RIP4(B,C,D,E,A,WR24,SR24,KR1);
-       RIP4(A,B,C,D,E,WR25,SR25,KR1);
-       RIP4(E,A,B,C,D,WR26,SR26,KR1);
-       RIP4(D,E,A,B,C,WR27,SR27,KR1);
-       RIP4(C,D,E,A,B,WR28,SR28,KR1);
-       RIP4(B,C,D,E,A,WR29,SR29,KR1);
-       RIP4(A,B,C,D,E,WR30,SR30,KR1);
-       RIP4(E,A,B,C,D,WR31,SR31,KR1);
-
-       RIP3(D,E,A,B,C,WR32,SR32,KR2);
-       RIP3(C,D,E,A,B,WR33,SR33,KR2);
-       RIP3(B,C,D,E,A,WR34,SR34,KR2);
-       RIP3(A,B,C,D,E,WR35,SR35,KR2);
-       RIP3(E,A,B,C,D,WR36,SR36,KR2);
-       RIP3(D,E,A,B,C,WR37,SR37,KR2);
-       RIP3(C,D,E,A,B,WR38,SR38,KR2);
-       RIP3(B,C,D,E,A,WR39,SR39,KR2);
-       RIP3(A,B,C,D,E,WR40,SR40,KR2);
-       RIP3(E,A,B,C,D,WR41,SR41,KR2);
-       RIP3(D,E,A,B,C,WR42,SR42,KR2);
-       RIP3(C,D,E,A,B,WR43,SR43,KR2);
-       RIP3(B,C,D,E,A,WR44,SR44,KR2);
-       RIP3(A,B,C,D,E,WR45,SR45,KR2);
-       RIP3(E,A,B,C,D,WR46,SR46,KR2);
-       RIP3(D,E,A,B,C,WR47,SR47,KR2);
-
-       RIP2(C,D,E,A,B,WR48,SR48,KR3);
-       RIP2(B,C,D,E,A,WR49,SR49,KR3);
-       RIP2(A,B,C,D,E,WR50,SR50,KR3);
-       RIP2(E,A,B,C,D,WR51,SR51,KR3);
-       RIP2(D,E,A,B,C,WR52,SR52,KR3);
-       RIP2(C,D,E,A,B,WR53,SR53,KR3);
-       RIP2(B,C,D,E,A,WR54,SR54,KR3);
-       RIP2(A,B,C,D,E,WR55,SR55,KR3);
-       RIP2(E,A,B,C,D,WR56,SR56,KR3);
-       RIP2(D,E,A,B,C,WR57,SR57,KR3);
-       RIP2(C,D,E,A,B,WR58,SR58,KR3);
-       RIP2(B,C,D,E,A,WR59,SR59,KR3);
-       RIP2(A,B,C,D,E,WR60,SR60,KR3);
-       RIP2(E,A,B,C,D,WR61,SR61,KR3);
-       RIP2(D,E,A,B,C,WR62,SR62,KR3);
-       RIP2(C,D,E,A,B,WR63,SR63,KR3);
-
-       RIP1(B,C,D,E,A,WR64,SR64);
-       RIP1(A,B,C,D,E,WR65,SR65);
-       RIP1(E,A,B,C,D,WR66,SR66);
-       RIP1(D,E,A,B,C,WR67,SR67);
-       RIP1(C,D,E,A,B,WR68,SR68);
-       RIP1(B,C,D,E,A,WR69,SR69);
-       RIP1(A,B,C,D,E,WR70,SR70);
-       RIP1(E,A,B,C,D,WR71,SR71);
-       RIP1(D,E,A,B,C,WR72,SR72);
-       RIP1(C,D,E,A,B,WR73,SR73);
-       RIP1(B,C,D,E,A,WR74,SR74);
-       RIP1(A,B,C,D,E,WR75,SR75);
-       RIP1(E,A,B,C,D,WR76,SR76);
-       RIP1(D,E,A,B,C,WR77,SR77);
-       RIP1(C,D,E,A,B,WR78,SR78);
-       RIP1(B,C,D,E,A,WR79,SR79);
-
-       D     =ctx->B+c+D;
-       ctx->B=ctx->C+d+E;
-       ctx->C=ctx->D+e+A;
-       ctx->D=ctx->E+a+B;
-       ctx->E=ctx->A+b+C;
-       ctx->A=D;
-
-               }
-       }
-#endif
-
 #ifndef ripemd160_block_data_order
 #ifdef X
 #undef X
index bb66dfa..f14b346 100644 (file)
  */
 #ifdef RMD160_ASM
 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
-#  define ripemd160_block_host_order ripemd160_block_asm_host_order
+#  define ripemd160_block_data_order ripemd160_block_asm_data_order
 # endif
 #endif
 
-void ripemd160_block_host_order (RIPEMD160_CTX *c, const void *p,size_t num);
 void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num);
 
-#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \
-    defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-# if !defined(B_ENDIAN)
-#  define ripemd160_block_data_order ripemd160_block_host_order
-# endif
-#endif
-
 #define DATA_ORDER_IS_LITTLE_ENDIAN
 
 #define HASH_LONG               RIPEMD160_LONG
-#define HASH_LONG_LOG2          RIPEMD160_LONG_LOG2
 #define HASH_CTX                RIPEMD160_CTX
 #define HASH_CBLOCK             RIPEMD160_CBLOCK
-#define HASH_LBLOCK             RIPEMD160_LBLOCK
 #define HASH_UPDATE             RIPEMD160_Update
 #define HASH_TRANSFORM          RIPEMD160_Transform
 #define HASH_FINAL              RIPEMD160_Final
-#define HASH_BLOCK_HOST_ORDER   ripemd160_block_host_order
 #define        HASH_MAKE_STRING(c,s)   do {    \
        unsigned long ll;               \
        ll=(c)->A; HOST_l2c(ll,(s));    \
@@ -105,9 +94,7 @@ void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num);
        ll=(c)->D; HOST_l2c(ll,(s));    \
        ll=(c)->E; HOST_l2c(ll,(s));    \
        } while (0)
-#if !defined(L_ENDIAN) || defined(ripemd160_block_data_order)
 #define HASH_BLOCK_DATA_ORDER   ripemd160_block_data_order
-#endif
 
 #include "md32_common.h"
 
index bbc20da..f83e5af 100644 (file)
@@ -18,23 +18,23 @@ const char *SHA256_version="SHA-256" OPENSSL_VERSION_PTEXT;
 
 int SHA224_Init (SHA256_CTX *c)
        {
+       memset (c,0,sizeof(*c));
        c->h[0]=0xc1059ed8UL;   c->h[1]=0x367cd507UL;
        c->h[2]=0x3070dd17UL;   c->h[3]=0xf70e5939UL;
        c->h[4]=0xffc00b31UL;   c->h[5]=0x68581511UL;
        c->h[6]=0x64f98fa7UL;   c->h[7]=0xbefa4fa4UL;
-       c->Nl=0;        c->Nh=0;
-       c->num=0;       c->md_len=SHA224_DIGEST_LENGTH;
+       c->md_len=SHA224_DIGEST_LENGTH;
        return 1;
        }
 
 int SHA256_Init (SHA256_CTX *c)
        {
+       memset (c,0,sizeof(*c));
        c->h[0]=0x6a09e667UL;   c->h[1]=0xbb67ae85UL;
        c->h[2]=0x3c6ef372UL;   c->h[3]=0xa54ff53aUL;
        c->h[4]=0x510e527fUL;   c->h[5]=0x9b05688cUL;
        c->h[6]=0x1f83d9abUL;   c->h[7]=0x5be0cd19UL;
-       c->Nl=0;        c->Nh=0;
-       c->num=0;       c->md_len=SHA256_DIGEST_LENGTH;
+       c->md_len=SHA256_DIGEST_LENGTH;
        return 1;
        }
 
@@ -69,17 +69,11 @@ int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
 int SHA224_Final (unsigned char *md, SHA256_CTX *c)
 {   return SHA256_Final (md,c);   }
 
-#ifndef        SHA_LONG_LOG2
-#define        SHA_LONG_LOG2   2       /* default to 32 bits */
-#endif
-
 #define        DATA_ORDER_IS_BIG_ENDIAN
 
 #define        HASH_LONG               SHA_LONG
-#define        HASH_LONG_LOG2          SHA_LONG_LOG2
 #define        HASH_CTX                SHA256_CTX
 #define        HASH_CBLOCK             SHA_CBLOCK
-#define        HASH_LBLOCK             SHA_LBLOCK
 /*
  * Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
  * default: case below covers for it. It's not clear however if it's
@@ -112,9 +106,7 @@ int SHA224_Final (unsigned char *md, SHA256_CTX *c)
 #define        HASH_UPDATE             SHA256_Update
 #define        HASH_TRANSFORM          SHA256_Transform
 #define        HASH_FINAL              SHA256_Final
-#define        HASH_BLOCK_HOST_ORDER   sha256_block_host_order
 #define        HASH_BLOCK_DATA_ORDER   sha256_block_data_order
-void sha256_block_host_order (SHA256_CTX *ctx, const void *in, size_t num);
 void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
 
 #include "md32_common.h"
@@ -158,7 +150,7 @@ static const SHA_LONG K256[64] = {
 static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
        {
        unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
-       SHA_LONG        X[16];
+       SHA_LONG        X[16],l;
        int i;
        const unsigned char *data=in;
 
@@ -167,33 +159,13 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
        a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
        e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
 
-       if (host)
-               {
-               const SHA_LONG *W=(const SHA_LONG *)data;
-
-               for (i=0;i<16;i++)
-                       {
-                       T1 = X[i] = W[i];
-                       T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
-                       T2 = Sigma0(a) + Maj(a,b,c);
-                       h = g;  g = f;  f = e;  e = d + T1;
-                       d = c;  c = b;  b = a;  a = T1 + T2;
-                       }
-
-               data += SHA256_CBLOCK;
-               }
-       else
+       for (i=0;i<16;i++)
                {
-               SHA_LONG l;
-
-               for (i=0;i<16;i++)
-                       {
-                       HOST_c2l(data,l); T1 = X[i] = l;
-                       T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
-                       T2 = Sigma0(a) + Maj(a,b,c);
-                       h = g;  g = f;  f = e;  e = d + T1;
-                       d = c;  c = b;  b = a;  a = T1 + T2;
-                       }
+               HOST_c2l(data,l); T1 = X[i] = l;
+               T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+               T2 = Sigma0(a) + Maj(a,b,c);
+               h = g;  g = f;  f = e;  e = d + T1;
+               d = c;  c = b;  b = a;  a = T1 + T2;
                }
 
        for (;i<64;i++)
@@ -233,13 +205,14 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
        SHA_LONG        X[16];
        int i;
        const unsigned char *data=in;
+       const union { long one; char little; } is_endian = {1};
 
                        while (num--) {
 
        a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
        e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
 
-       if (host)
+       if (!is_endian.little && sizeof(SHA_LONG)==4 && ((size_t)in%4)==0)
                {
                const SHA_LONG *W=(const SHA_LONG *)data;
 
@@ -305,14 +278,6 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
 #endif
 #endif /* SHA256_ASM */
 
-/*
- * Idea is to trade couple of cycles for some space. On IA-32 we save
- * about 4K in "big footprint" case. In "small footprint" case any gain
- * is appreciated:-)
- */
-void HASH_BLOCK_HOST_ORDER (SHA256_CTX *ctx, const void *in, size_t num)
-{   sha256_block (ctx,in,num,1);   }
-
 void HASH_BLOCK_DATA_ORDER (SHA256_CTX *ctx, const void *in, size_t num)
 {   sha256_block (ctx,in,num,0);   }
 
index 2f5ab1f..d482ba1 100644 (file)
 #include <openssl/opensslconf.h>
 #include <openssl/sha.h>
 
-#ifndef SHA_LONG_LOG2
-#define SHA_LONG_LOG2  2       /* default to 32 bits */
-#endif
-
 #define DATA_ORDER_IS_BIG_ENDIAN
 
 #define HASH_LONG               SHA_LONG
-#define HASH_LONG_LOG2          SHA_LONG_LOG2
 #define HASH_CTX                SHA_CTX
 #define HASH_CBLOCK             SHA_CBLOCK
-#define HASH_LBLOCK             SHA_LBLOCK
 #define HASH_MAKE_STRING(c,s)   do {   \
        unsigned long ll;               \
        ll=(c)->h0; HOST_l2c(ll,(s));   \
 # define HASH_TRANSFORM                SHA_Transform
 # define HASH_FINAL                    SHA_Final
 # define HASH_INIT                     SHA_Init
-# define HASH_BLOCK_HOST_ORDER         sha_block_host_order
 # define HASH_BLOCK_DATA_ORDER         sha_block_data_order
 # define Xupdate(a,ix,ia,ib,ic,id)     (ix=(a)=(ia^ib^ic^id))
 
-  void sha_block_host_order (SHA_CTX *c, const void *p,size_t num);
   void sha_block_data_order (SHA_CTX *c, const void *p,size_t num);
 
 #elif defined(SHA_1)
 # define HASH_TRANSFORM                SHA1_Transform
 # define HASH_FINAL                    SHA1_Final
 # define HASH_INIT                     SHA1_Init
-# define HASH_BLOCK_HOST_ORDER         sha1_block_host_order
 # define HASH_BLOCK_DATA_ORDER         sha1_block_data_order
 # if defined(__MWERKS__) && defined(__MC68K__)
    /* Metrowerks for Motorola fails otherwise:-( <appro@fy.chalmers.se> */
 # ifdef SHA1_ASM
 #  if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) \
    || defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-#   define sha1_block_host_order               sha1_block_asm_host_order
-#   define DONT_IMPLEMENT_BLOCK_HOST_ORDER
 #   define sha1_block_data_order               sha1_block_asm_data_order
 #   define DONT_IMPLEMENT_BLOCK_DATA_ORDER
-#   define HASH_BLOCK_DATA_ORDER_ALIGNED       sha1_block_asm_data_order
 #  elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
-#   define sha1_block_host_order               sha1_block_asm_host_order
-#   define DONT_IMPLEMENT_BLOCK_HOST_ORDER
 #   define sha1_block_data_order               sha1_block_asm_data_order
 #   define DONT_IMPLEMENT_BLOCK_DATA_ORDER
 #  elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
        defined(__ppc) || defined(__ppc__) || defined(__powerpc) || \
        defined(__ppc64) || defined(__ppc64__) || defined(__powerpc64)
-#   define sha1_block_host_order               sha1_block_asm_data_order
-#   define DONT_IMPLEMENT_BLOCK_HOST_ORDER
 #   define sha1_block_data_order               sha1_block_asm_data_order
 #   define DONT_IMPLEMENT_BLOCK_DATA_ORDER
 #  endif
 # endif
-  void sha1_block_host_order (SHA_CTX *c, const void *p,size_t num);
   void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num);
 
 #else
 
 int HASH_INIT (SHA_CTX *c)
        {
+       memset (c,0,sizeof(*c));
        c->h0=INIT_DATA_h0;
        c->h1=INIT_DATA_h1;
        c->h2=INIT_DATA_h2;
        c->h3=INIT_DATA_h3;
        c->h4=INIT_DATA_h4;
-       c->Nl=0;
-       c->Nh=0;
-       c->num=0;
        return 1;
        }
 
@@ -235,131 +216,6 @@ int HASH_INIT (SHA_CTX *c)
 # define X(i)  XX[i]
 #endif
 
-#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER
-void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, size_t num)
-       {
-       const SHA_LONG *W=d;
-       register unsigned MD32_REG_T A,B,C,D,E,T;
-#ifndef MD32_XARRAY
-       unsigned MD32_REG_T     XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7,
-                               XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15;
-#else
-       SHA_LONG        XX[16];
-#endif
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
-
-       for (;;)
-               {
-       BODY_00_15( 0,A,B,C,D,E,T,W[ 0]);
-       BODY_00_15( 1,T,A,B,C,D,E,W[ 1]);
-       BODY_00_15( 2,E,T,A,B,C,D,W[ 2]);
-       BODY_00_15( 3,D,E,T,A,B,C,W[ 3]);
-       BODY_00_15( 4,C,D,E,T,A,B,W[ 4]);
-       BODY_00_15( 5,B,C,D,E,T,A,W[ 5]);
-       BODY_00_15( 6,A,B,C,D,E,T,W[ 6]);
-       BODY_00_15( 7,T,A,B,C,D,E,W[ 7]);
-       BODY_00_15( 8,E,T,A,B,C,D,W[ 8]);
-       BODY_00_15( 9,D,E,T,A,B,C,W[ 9]);
-       BODY_00_15(10,C,D,E,T,A,B,W[10]);
-       BODY_00_15(11,B,C,D,E,T,A,W[11]);
-       BODY_00_15(12,A,B,C,D,E,T,W[12]);
-       BODY_00_15(13,T,A,B,C,D,E,W[13]);
-       BODY_00_15(14,E,T,A,B,C,D,W[14]);
-       BODY_00_15(15,D,E,T,A,B,C,W[15]);
-
-       BODY_16_19(16,C,D,E,T,A,B,X( 0),W[ 0],W[ 2],W[ 8],W[13]);
-       BODY_16_19(17,B,C,D,E,T,A,X( 1),W[ 1],W[ 3],W[ 9],W[14]);
-       BODY_16_19(18,A,B,C,D,E,T,X( 2),W[ 2],W[ 4],W[10],W[15]);
-       BODY_16_19(19,T,A,B,C,D,E,X( 3),W[ 3],W[ 5],W[11],X( 0));
-
-       BODY_20_31(20,E,T,A,B,C,D,X( 4),W[ 4],W[ 6],W[12],X( 1));
-       BODY_20_31(21,D,E,T,A,B,C,X( 5),W[ 5],W[ 7],W[13],X( 2));
-       BODY_20_31(22,C,D,E,T,A,B,X( 6),W[ 6],W[ 8],W[14],X( 3));
-       BODY_20_31(23,B,C,D,E,T,A,X( 7),W[ 7],W[ 9],W[15],X( 4));
-       BODY_20_31(24,A,B,C,D,E,T,X( 8),W[ 8],W[10],X( 0),X( 5));
-       BODY_20_31(25,T,A,B,C,D,E,X( 9),W[ 9],W[11],X( 1),X( 6));
-       BODY_20_31(26,E,T,A,B,C,D,X(10),W[10],W[12],X( 2),X( 7));
-       BODY_20_31(27,D,E,T,A,B,C,X(11),W[11],W[13],X( 3),X( 8));
-       BODY_20_31(28,C,D,E,T,A,B,X(12),W[12],W[14],X( 4),X( 9));
-       BODY_20_31(29,B,C,D,E,T,A,X(13),W[13],W[15],X( 5),X(10));
-       BODY_20_31(30,A,B,C,D,E,T,X(14),W[14],X( 0),X( 6),X(11));
-       BODY_20_31(31,T,A,B,C,D,E,X(15),W[15],X( 1),X( 7),X(12));
-
-       BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
-       BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
-       BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15));
-       BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0));
-       BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1));
-       BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2));
-       BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3));
-       BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4));
-
-       BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5));
-       BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6));
-       BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7));
-       BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8));
-       BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9));
-       BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10));
-       BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11));
-       BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12));
-       BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13));
-       BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14));
-       BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15));
-       BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0));
-       BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1));
-       BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2));
-       BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3));
-       BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4));
-       BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5));
-       BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6));
-       BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7));
-       BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8));
-
-       BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9));
-       BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10));
-       BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11));
-       BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12));
-       BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13));
-       BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14));
-       BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15));
-       BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0));
-       BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1));
-       BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2));
-       BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3));
-       BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4));
-       BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5));
-       BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6));
-       BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7));
-       BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8));
-       BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9));
-       BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10));
-       BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11));
-       BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12));
-       
-       c->h0=(c->h0+E)&0xffffffffL; 
-       c->h1=(c->h1+T)&0xffffffffL;
-       c->h2=(c->h2+A)&0xffffffffL;
-       c->h3=(c->h3+B)&0xffffffffL;
-       c->h4=(c->h4+C)&0xffffffffL;
-
-       if (--num == 0) break;
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
-
-       W+=SHA_LBLOCK;
-               }
-       }
-#endif
-
 #ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER
 void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
        {
@@ -379,43 +235,86 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
        E=c->h4;
 
        for (;;)
-               {
+                       {
+       const union { long one; char little; } is_endian = {1};
 
-       HOST_c2l(data,l); X( 0)=l;              HOST_c2l(data,l); X( 1)=l;
-       BODY_00_15( 0,A,B,C,D,E,T,X( 0));       HOST_c2l(data,l); X( 2)=l;
-       BODY_00_15( 1,T,A,B,C,D,E,X( 1));       HOST_c2l(data,l); X( 3)=l;
-       BODY_00_15( 2,E,T,A,B,C,D,X( 2));       HOST_c2l(data,l); X( 4)=l;
-       BODY_00_15( 3,D,E,T,A,B,C,X( 3));       HOST_c2l(data,l); X( 5)=l;
-       BODY_00_15( 4,C,D,E,T,A,B,X( 4));       HOST_c2l(data,l); X( 6)=l;
-       BODY_00_15( 5,B,C,D,E,T,A,X( 5));       HOST_c2l(data,l); X( 7)=l;
-       BODY_00_15( 6,A,B,C,D,E,T,X( 6));       HOST_c2l(data,l); X( 8)=l;
-       BODY_00_15( 7,T,A,B,C,D,E,X( 7));       HOST_c2l(data,l); X( 9)=l;
-       BODY_00_15( 8,E,T,A,B,C,D,X( 8));       HOST_c2l(data,l); X(10)=l;
-       BODY_00_15( 9,D,E,T,A,B,C,X( 9));       HOST_c2l(data,l); X(11)=l;
-       BODY_00_15(10,C,D,E,T,A,B,X(10));       HOST_c2l(data,l); X(12)=l;
-       BODY_00_15(11,B,C,D,E,T,A,X(11));       HOST_c2l(data,l); X(13)=l;
-       BODY_00_15(12,A,B,C,D,E,T,X(12));       HOST_c2l(data,l); X(14)=l;
-       BODY_00_15(13,T,A,B,C,D,E,X(13));       HOST_c2l(data,l); X(15)=l;
-       BODY_00_15(14,E,T,A,B,C,D,X(14));
-       BODY_00_15(15,D,E,T,A,B,C,X(15));
-
-       BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
-       BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
-       BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15));
-       BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0));
-
-       BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1));
-       BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2));
-       BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3));
-       BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4));
-       BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5));
-       BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6));
-       BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7));
-       BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8));
-       BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9));
-       BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10));
-       BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11));
-       BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12));
+       if (!is_endian.little && sizeof(SHA_LONG)==4 && ((size_t)p%4)==0)
+               {
+               const SHA_LONG *W=p;
+
+               BODY_00_15( 0,A,B,C,D,E,T,W[ 0]);
+               BODY_00_15( 1,T,A,B,C,D,E,W[ 1]);
+               BODY_00_15( 2,E,T,A,B,C,D,W[ 2]);
+               BODY_00_15( 3,D,E,T,A,B,C,W[ 3]);
+               BODY_00_15( 4,C,D,E,T,A,B,W[ 4]);
+               BODY_00_15( 5,B,C,D,E,T,A,W[ 5]);
+               BODY_00_15( 6,A,B,C,D,E,T,W[ 6]);
+               BODY_00_15( 7,T,A,B,C,D,E,W[ 7]);
+               BODY_00_15( 8,E,T,A,B,C,D,W[ 8]);
+               BODY_00_15( 9,D,E,T,A,B,C,W[ 9]);
+               BODY_00_15(10,C,D,E,T,A,B,W[10]);
+               BODY_00_15(11,B,C,D,E,T,A,W[11]);
+               BODY_00_15(12,A,B,C,D,E,T,W[12]);
+               BODY_00_15(13,T,A,B,C,D,E,W[13]);
+               BODY_00_15(14,E,T,A,B,C,D,W[14]);
+               BODY_00_15(15,D,E,T,A,B,C,W[15]);
+
+               BODY_16_19(16,C,D,E,T,A,B,X( 0),W[ 0],W[ 2],W[ 8],W[13]);
+               BODY_16_19(17,B,C,D,E,T,A,X( 1),W[ 1],W[ 3],W[ 9],W[14]);
+               BODY_16_19(18,A,B,C,D,E,T,X( 2),W[ 2],W[ 4],W[10],W[15]);
+               BODY_16_19(19,T,A,B,C,D,E,X( 3),W[ 3],W[ 5],W[11],X( 0));
+
+               BODY_20_31(20,E,T,A,B,C,D,X( 4),W[ 4],W[ 6],W[12],X( 1));
+               BODY_20_31(21,D,E,T,A,B,C,X( 5),W[ 5],W[ 7],W[13],X( 2));
+               BODY_20_31(22,C,D,E,T,A,B,X( 6),W[ 6],W[ 8],W[14],X( 3));
+               BODY_20_31(23,B,C,D,E,T,A,X( 7),W[ 7],W[ 9],W[15],X( 4));
+               BODY_20_31(24,A,B,C,D,E,T,X( 8),W[ 8],W[10],X( 0),X( 5));
+               BODY_20_31(25,T,A,B,C,D,E,X( 9),W[ 9],W[11],X( 1),X( 6));
+               BODY_20_31(26,E,T,A,B,C,D,X(10),W[10],W[12],X( 2),X( 7));
+               BODY_20_31(27,D,E,T,A,B,C,X(11),W[11],W[13],X( 3),X( 8));
+               BODY_20_31(28,C,D,E,T,A,B,X(12),W[12],W[14],X( 4),X( 9));
+               BODY_20_31(29,B,C,D,E,T,A,X(13),W[13],W[15],X( 5),X(10));
+               BODY_20_31(30,A,B,C,D,E,T,X(14),W[14],X( 0),X( 6),X(11));
+               BODY_20_31(31,T,A,B,C,D,E,X(15),W[15],X( 1),X( 7),X(12));
+               }
+       else
+               {
+               HOST_c2l(data,l); X( 0)=l;              HOST_c2l(data,l); X( 1)=l;
+               BODY_00_15( 0,A,B,C,D,E,T,X( 0));       HOST_c2l(data,l); X( 2)=l;
+               BODY_00_15( 1,T,A,B,C,D,E,X( 1));       HOST_c2l(data,l); X( 3)=l;
+               BODY_00_15( 2,E,T,A,B,C,D,X( 2));       HOST_c2l(data,l); X( 4)=l;
+               BODY_00_15( 3,D,E,T,A,B,C,X( 3));       HOST_c2l(data,l); X( 5)=l;
+               BODY_00_15( 4,C,D,E,T,A,B,X( 4));       HOST_c2l(data,l); X( 6)=l;
+               BODY_00_15( 5,B,C,D,E,T,A,X( 5));       HOST_c2l(data,l); X( 7)=l;
+               BODY_00_15( 6,A,B,C,D,E,T,X( 6));       HOST_c2l(data,l); X( 8)=l;
+               BODY_00_15( 7,T,A,B,C,D,E,X( 7));       HOST_c2l(data,l); X( 9)=l;
+               BODY_00_15( 8,E,T,A,B,C,D,X( 8));       HOST_c2l(data,l); X(10)=l;
+               BODY_00_15( 9,D,E,T,A,B,C,X( 9));       HOST_c2l(data,l); X(11)=l;
+               BODY_00_15(10,C,D,E,T,A,B,X(10));       HOST_c2l(data,l); X(12)=l;
+               BODY_00_15(11,B,C,D,E,T,A,X(11));       HOST_c2l(data,l); X(13)=l;
+               BODY_00_15(12,A,B,C,D,E,T,X(12));       HOST_c2l(data,l); X(14)=l;
+               BODY_00_15(13,T,A,B,C,D,E,X(13));       HOST_c2l(data,l); X(15)=l;
+               BODY_00_15(14,E,T,A,B,C,D,X(14));
+               BODY_00_15(15,D,E,T,A,B,C,X(15));
+
+               BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
+               BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
+               BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15));
+               BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0));
+
+               BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1));
+               BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2));
+               BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3));
+               BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4));
+               BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5));
+               BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6));
+               BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7));
+               BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8));
+               BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9));
+               BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10));
+               BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11));
+               BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12));
+               }
 
        BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
        BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
@@ -482,7 +381,7 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
        D=c->h3;
        E=c->h4;
 
-               }
+                       }
        }
 #endif
 
@@ -517,52 +416,6 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
        E=D, D=C, C=ROTATE(B,30), B=A;  \
        A=ROTATE(A,5)+T+xa;         } while(0)
 
-#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER
-void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, size_t num)
-       {
-       const SHA_LONG *W=d;
-       register unsigned MD32_REG_T A,B,C,D,E,T;
-       int i;
-       SHA_LONG        X[16];
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
-
-       for (;;)
-               {
-       for (i=0;i<16;i++)
-       { X[i]=W[i]; BODY_00_15(X[i]); }
-       for (i=0;i<4;i++)
-       { BODY_16_19(X[i],       X[i+2],      X[i+8],     X[(i+13)&15]); }
-       for (;i<24;i++)
-       { BODY_20_39(X[i&15],    X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
-       for (i=0;i<20;i++)
-       { BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
-       for (i=4;i<24;i++)
-       { BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
-       
-       c->h0=(c->h0+A)&0xffffffffL; 
-       c->h1=(c->h1+B)&0xffffffffL;
-       c->h2=(c->h2+C)&0xffffffffL;
-       c->h3=(c->h3+D)&0xffffffffL;
-       c->h4=(c->h4+E)&0xffffffffL;
-
-       if (--num == 0) break;
-
-       A=c->h0;
-       B=c->h1;
-       C=c->h2;
-       D=c->h3;
-       E=c->h4;
-
-       W+=SHA_LBLOCK;
-               }
-       }
-#endif
-
 #ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER
 void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
        {