crypto/md32_common.h

   1 /* crypto/md32_common.h */
   2 /* ====================================================================
   3  * Copyright (c) 1999-2006 The OpenSSL Project.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  *
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * 3. All advertising materials mentioning features or use of this
  18  *    software must display the following acknowledgment:
  19  *    "This product includes software developed by the OpenSSL Project
  20  *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
  21  *
  22  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  23  *    endorse or promote products derived from this software without
  24  *    prior written permission. For written permission, please contact
  25  *    licensing@OpenSSL.org.
  26  *
  27  * 5. Products derived from this software may not be called "OpenSSL"
  28  *    nor may "OpenSSL" appear in their names without prior written
  29  *    permission of the OpenSSL Project.
  30  *
  31  * 6. Redistributions of any form whatsoever must retain the following
  32  *    acknowledgment:
  33  *    "This product includes software developed by the OpenSSL Project
  34  *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
  35  *
  36  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  37  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  39  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
  40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  42  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  43  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  45  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  46  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  47  * OF THE POSSIBILITY OF SUCH DAMAGE.
  48  * ====================================================================
  49  *
  50  */
  51
  52 /*
  53  * This is a generic 32 bit "collector" for message digest algorithms.
  54  * Whenever needed it collects input character stream into chunks of
  55  * 32 bit values and invokes a block function that performs actual hash
  56  * calculations.
  57  *
  58  * Porting guide.
  59  *
  60  * Obligatory macros:
  61  *
  62  * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
  63  *      this macro defines byte order of input stream.
  64  * HASH_CBLOCK
  65  *      size of a unit chunk HASH_BLOCK operates on.
  66  * HASH_LONG
  67  *      has to be at lest 32 bit wide.
  68  * HASH_CTX
  69  *      context structure that at least contains following
  70  *      members:
  71  *              typedef struct {
  72  *                      ...
  73  *                      HASH_LONG       Nl,Nh;
  74  *                      either {
  75  *                        HASH_LONG     data[HASH_LBLOCK];
  76  *                        unsigned char data[HASH_CBLOCK];
  77  *                      };
  78  *                      unsigned int    num;
  79  *                      ...
  80  *                      } HASH_CTX;
  81  *      data[] vector is expected to be zeroed upon first call to
  82  *      HASH_UPDATE.
  83  * HASH_UPDATE
  84  *      name of "Update" function, implemented here.
  85  * HASH_TRANSFORM
  86  *      name of "Transform" function, implemented here.
  87  * HASH_FINAL
  88  *      name of "Final" function, implemented here.
  89  * HASH_BLOCK_DATA_ORDER
  90  *      name of "block" function capable of treating *unaligned* input
  91  *      message in original (data) byte order, implemented externally.
  92  * HASH_MAKE_STRING
  93  *      macro convering context variables to an ASCII hash string.
  94  *
  95  * MD5 example:
  96  *
  97  *      #define DATA_ORDER_IS_LITTLE_ENDIAN
  98  *
  99  *      #define HASH_LONG               MD5_LONG
 100  *      #define HASH_CTX                MD5_CTX
 101  *      #define HASH_CBLOCK             MD5_CBLOCK
 102  *      #define HASH_UPDATE             MD5_Update
 103  *      #define HASH_TRANSFORM          MD5_Transform
 104  *      #define HASH_FINAL              MD5_Final
 105  *      #define HASH_BLOCK_DATA_ORDER   md5_block_data_order
 106  *
 107  *                                      <appro@fy.chalmers.se>
 108  */
 109
 110 #if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 111 #error "DATA_ORDER must be defined!"
 112 #endif
 113
 114 #ifndef HASH_CBLOCK
 115 #error "HASH_CBLOCK must be defined!"
 116 #endif
 117 #ifndef HASH_LONG
 118 #error "HASH_LONG must be defined!"
 119 #endif
 120 #ifndef HASH_CTX
 121 #error "HASH_CTX must be defined!"
 122 #endif
 123
 124 #ifndef HASH_UPDATE
 125 #error "HASH_UPDATE must be defined!"
 126 #endif
 127 #ifndef HASH_TRANSFORM
 128 #error "HASH_TRANSFORM must be defined!"
 129 #endif
 130 #ifndef HASH_FINAL
 131 #error "HASH_FINAL must be defined!"
 132 #endif
 133
 134 #ifndef HASH_BLOCK_DATA_ORDER
 135 #error "HASH_BLOCK_DATA_ORDER must be defined!"
 136 #endif
 137
 138 /*
 139  * Engage compiler specific rotate intrinsic function if available.
 140  */
 141 #undef ROTATE
 142 #ifndef PEDANTIC
 143 # if defined(_MSC_VER) || defined(__ICC)
 144 #  define ROTATE(a,n)   _lrotl(a,n)
 145 # elif defined(__MWERKS__)
 146 #  if defined(__POWERPC__)
 147 #   define ROTATE(a,n)  __rlwinm(a,n,0,31)
 148 #  elif defined(__MC68K__)
 149     /* Motorola specific tweak. <appro@fy.chalmers.se> */
 150 #   define ROTATE(a,n)  ( n<24 ? __rol(a,n) : __ror(a,32-n) )
 151 #  else
 152 #   define ROTATE(a,n)  __rol(a,n)
 153 #  endif
 154 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 155   /*
 156    * Some GNU C inline assembler templates. Note that these are
 157    * rotates by *constant* number of bits! But that's exactly
 158    * what we need here...
 159    *                                    <appro@fy.chalmers.se>
 160    */
 161 #  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 162 #   define ROTATE(a,n)  ({ register unsigned int ret;   \
 163                                 asm (                   \
 164                                 "roll %1,%0"            \
 165                                 : "=r"(ret)             \
 166                                 : "I"(n), "0"(a)        \
 167                                 : "cc");                \
 168                            ret;                         \
 169                         })
 170 #  elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
 171         defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
 172 #   define ROTATE(a,n)  ({ register unsigned int ret;   \
 173                                 asm (                   \
 174                                 "rlwinm %0,%1,%2,0,31"  \
 175                                 : "=r"(ret)             \
 176                                 : "r"(a), "I"(n));      \
 177                            ret;                         \
 178                         })
 179 #  elif defined(__s390x__)
 180 #   define ROTATE(a,n) ({ register unsigned int ret;    \
 181                                 asm ("rll %0,%1,%2"     \
 182                                 : "=r"(ret)             \
 183                                 : "r"(a), "I"(n));      \
 184                           ret;                          \
 185                         })
 186 #  endif
 187 # endif
 188 #endif /* PEDANTIC */
 189
 190 #ifndef ROTATE
 191 #define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
 192 #endif
 193
 194 #if defined(DATA_ORDER_IS_BIG_ENDIAN)
 195
 196 #ifndef PEDANTIC
 197 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 198 #  if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
 199       (defined(__x86_64) || defined(__x86_64__))
 200 #   if !defined(B_ENDIAN)
 201     /*
 202      * This gives ~30-40% performance improvement in SHA-256 compiled
 203      * with gcc [on P4]. Well, first macro to be frank. We can pull
 204      * this trick on x86* platforms only, because these CPUs can fetch
 205      * unaligned data without raising an exception.
 206      */
 207 #   define HOST_c2l(c,l)        ({ unsigned int r=*((const unsigned int *)(c)); \
 208                                    asm ("bswapl %0":"=r"(r):"0"(r));    \
 209                                    (c)+=4; (l)=r;                       })
 210 #   define HOST_l2c(l,c)        ({ unsigned int r=(l);                  \
 211                                    asm ("bswapl %0":"=r"(r):"0"(r));    \
 212                                    *((unsigned int *)(c))=r; (c)+=4; r; })
 213 #   endif
 214 #  endif
 215 # endif
 216 #endif
 217 #if defined(__s390__) || defined(__s390x__)
 218 # define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
 219 # define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
 220 #endif
 221
 222 #ifndef HOST_c2l
 223 #define HOST_c2l(c,l)   (l =(((unsigned long)(*((c)++)))<<24),          \
 224                          l|=(((unsigned long)(*((c)++)))<<16),          \
 225                          l|=(((unsigned long)(*((c)++)))<< 8),          \
 226                          l|=(((unsigned long)(*((c)++)))    ),          \
 227                          l)
 228 #endif
 229 #ifndef HOST_l2c
 230 #define HOST_l2c(l,c)   (*((c)++)=(unsigned char)(((l)>>24)&0xff),      \
 231                          *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
 232                          *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
 233                          *((c)++)=(unsigned char)(((l)    )&0xff),      \
 234                          l)
 235 #endif
 236
 237 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 238
 239 #ifndef PEDANTIC
 240 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 241 #  if defined(__s390x__)
 242 #   define HOST_c2l(c,l)        ({ asm ("lrv    %0,0(%1)"               \
 243                                         :"=r"(l) : "r"(c));             \
 244                                    (c)+=4; (l);                         })
 245 #   define HOST_l2c(l,c)        ({ asm ("strv   %0,0(%1)"               \
 246                                         : : "r"(l),"r"(c) : "memory");  \
 247                                    (c)+=4; (l);                         })
 248 #  endif
 249 # endif
 250 #endif
 251 #if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 252 # ifndef B_ENDIAN
 253    /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
 254 #  define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l)
 255 #  define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l)
 256 # endif
 257 #endif
 258
 259 #ifndef HOST_c2l
 260 #define HOST_c2l(c,l)   (l =(((unsigned long)(*((c)++)))    ),          \
 261                          l|=(((unsigned long)(*((c)++)))<< 8),          \
 262                          l|=(((unsigned long)(*((c)++)))<<16),          \
 263                          l|=(((unsigned long)(*((c)++)))<<24),          \
 264                          l)
 265 #endif
 266 #ifndef HOST_l2c
 267 #define HOST_l2c(l,c)   (*((c)++)=(unsigned char)(((l)    )&0xff),      \
 268                          *((c)++)=(unsigned char)(((l)>> 8)&0xff),      \
 269                          *((c)++)=(unsigned char)(((l)>>16)&0xff),      \
 270                          *((c)++)=(unsigned char)(((l)>>24)&0xff),      \
 271                          l)
 272 #endif
 273
 274 #endif
 275
 276 /*
 277  * Time for some action:-)
 278  */
 279
 280 int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
 281         {
 282         const unsigned char *data=data_;
 283         unsigned char *p;
 284         HASH_LONG l;
 285         size_t n;
 286
 287         if (len==0) return 1;
 288
 289         l=(c->Nl+(((HASH_LONG)len)<<3))&0xffffffffUL;
 290         /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
 291          * Wei Dai <weidai@eskimo.com> for pointing it out. */
 292         if (l < c->Nl) /* overflow */
 293                 c->Nh++;
 294         c->Nh+=(len>>29);       /* might cause compiler warning on 16-bit */
 295         c->Nl=l;
 296
 297         n = c->num;
 298         if (n != 0)
 299                 {
 300                 p=(unsigned char *)c->data;
 301
 302                 if ((n+len) >= HASH_CBLOCK)
 303                         {
 304                         memcpy (p+n,data,HASH_CBLOCK-n);
 305                         HASH_BLOCK_DATA_ORDER (c,p,1);
 306                         n      = HASH_CBLOCK-n;
 307                         data  += n;
 308                         len   -= n;
 309                         c->num = 0;
 310                         memset (p,0,HASH_CBLOCK);       /* keep it zeroed */
 311                         }
 312                 else
 313                         {
 314                         memcpy (p+n,data,len);
 315                         c->num += (unsigned int)len;
 316                         return 1;
 317                         }
 318                 }
 319
 320         n = len/HASH_CBLOCK;
 321         if (n > 0)
 322                 {
 323                 HASH_BLOCK_DATA_ORDER (c,data,n);
 324                 n    *= HASH_CBLOCK;
 325                 data += n;
 326                 len  -= n;
 327                 }
 328
 329         if (len != 0)
 330                 {
 331                 p = (unsigned char *)c->data;
 332                 c->num = len;
 333                 memcpy (p,data,len);
 334                 }
 335         return 1;
 336         }
 337
 338
 339 void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
 340         {
 341         HASH_BLOCK_DATA_ORDER (c,data,1);
 342         }
 343
 344
 345 int HASH_FINAL (unsigned char *md, HASH_CTX *c)
 346         {
 347         unsigned char *p = (unsigned char *)c->data;
 348         size_t n = c->num;
 349
 350         p[n] = 0x80; /* there is always room for one */
 351         n++;
 352
 353         if (n > (HASH_CBLOCK-8))
 354                 {
 355                 HASH_BLOCK_DATA_ORDER (c,p,1);
 356                 memset (p,0,HASH_CBLOCK);
 357                 }
 358
 359         p += HASH_CBLOCK-8;
 360 #if   defined(DATA_ORDER_IS_BIG_ENDIAN)
 361         (void)HOST_l2c(c->Nh,p);
 362         (void)HOST_l2c(c->Nl,p);
 363 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
 364         (void)HOST_l2c(c->Nl,p);
 365         (void)HOST_l2c(c->Nh,p);
 366 #endif
 367         p -= HASH_CBLOCK;
 368         HASH_BLOCK_DATA_ORDER (c,p,1);
 369         c->num=0;
 370         memset (p,0,HASH_CBLOCK);
 371
 372 #ifndef HASH_MAKE_STRING
 373 #error "HASH_MAKE_STRING must be defined!"
 374 #else
 375         HASH_MAKE_STRING(c,md);
 376 #endif
 377
 378         return 1;
 379         }
 380
 381 #ifndef MD32_REG_T
 382 #define MD32_REG_T long
 383 /*
 384  * This comment was originaly written for MD5, which is why it
 385  * discusses A-D. But it basically applies to all 32-bit digests,
 386  * which is why it was moved to common header file.
 387  *
 388  * In case you wonder why A-D are declared as long and not
 389  * as MD5_LONG. Doing so results in slight performance
 390  * boost on LP64 architectures. The catch is we don't
 391  * really care if 32 MSBs of a 64-bit register get polluted
 392  * with eventual overflows as we *save* only 32 LSBs in
 393  * *either* case. Now declaring 'em long excuses the compiler
 394  * from keeping 32 MSBs zeroed resulting in 13% performance
 395  * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
 396  * Well, to be honest it should say that this *prevents*
 397  * performance degradation.
 398  *                              <appro@fy.chalmers.se>
 399  * Apparently there're LP64 compilers that generate better
 400  * code if A-D are declared int. Most notably GCC-x86_64
 401  * generates better code.
 402  *                              <appro@fy.chalmers.se>
 403  */
 404 #endif