Make Makefiles OSF-make-friendly.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__) || \
58     defined(__aarch64__) || \
59     defined(SHA512_ASM)
60 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61 #endif
62
63 fips_md_init_ctx(SHA384, SHA512)
64         {
65         c->h[0]=U64(0xcbbb9d5dc1059ed8);
66         c->h[1]=U64(0x629a292a367cd507);
67         c->h[2]=U64(0x9159015a3070dd17);
68         c->h[3]=U64(0x152fecd8f70e5939);
69         c->h[4]=U64(0x67332667ffc00b31);
70         c->h[5]=U64(0x8eb44a8768581511);
71         c->h[6]=U64(0xdb0c2e0d64f98fa7);
72         c->h[7]=U64(0x47b5481dbefa4fa4);
73
74         c->Nl=0;        c->Nh=0;
75         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
76         return 1;
77         }
78
79 fips_md_init(SHA512)
80         {
81         c->h[0]=U64(0x6a09e667f3bcc908);
82         c->h[1]=U64(0xbb67ae8584caa73b);
83         c->h[2]=U64(0x3c6ef372fe94f82b);
84         c->h[3]=U64(0xa54ff53a5f1d36f1);
85         c->h[4]=U64(0x510e527fade682d1);
86         c->h[5]=U64(0x9b05688c2b3e6c1f);
87         c->h[6]=U64(0x1f83d9abfb41bd6b);
88         c->h[7]=U64(0x5be0cd19137e2179);
89
90         c->Nl=0;        c->Nh=0;
91         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
92         return 1;
93         }
94
95 #ifndef SHA512_ASM
96 static
97 #endif
98 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
99
100 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
101         {
102         unsigned char *p=(unsigned char *)c->u.p;
103         size_t n=c->num;
104
105         p[n]=0x80;      /* There always is a room for one */
106         n++;
107         if (n > (sizeof(c->u)-16))
108                 memset (p+n,0,sizeof(c->u)-n), n=0,
109                 sha512_block_data_order (c,p,1);
110
111         memset (p+n,0,sizeof(c->u)-16-n);
112 #ifdef  B_ENDIAN
113         c->u.d[SHA_LBLOCK-2] = c->Nh;
114         c->u.d[SHA_LBLOCK-1] = c->Nl;
115 #else
116         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
117         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
118         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
119         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
120         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
121         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
122         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
123         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
124         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
125         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
126         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
127         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
128         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
129         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
130         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
131         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
132 #endif
133
134         sha512_block_data_order (c,p,1);
135
136         if (md==0) return 0;
137
138         switch (c->md_len)
139                 {
140                 /* Let compiler decide if it's appropriate to unroll... */
141                 case SHA384_DIGEST_LENGTH:
142                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
143                                 {
144                                 SHA_LONG64 t = c->h[n];
145
146                                 *(md++) = (unsigned char)(t>>56);
147                                 *(md++) = (unsigned char)(t>>48);
148                                 *(md++) = (unsigned char)(t>>40);
149                                 *(md++) = (unsigned char)(t>>32);
150                                 *(md++) = (unsigned char)(t>>24);
151                                 *(md++) = (unsigned char)(t>>16);
152                                 *(md++) = (unsigned char)(t>>8);
153                                 *(md++) = (unsigned char)(t);
154                                 }
155                         break;
156                 case SHA512_DIGEST_LENGTH:
157                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
158                                 {
159                                 SHA_LONG64 t = c->h[n];
160
161                                 *(md++) = (unsigned char)(t>>56);
162                                 *(md++) = (unsigned char)(t>>48);
163                                 *(md++) = (unsigned char)(t>>40);
164                                 *(md++) = (unsigned char)(t>>32);
165                                 *(md++) = (unsigned char)(t>>24);
166                                 *(md++) = (unsigned char)(t>>16);
167                                 *(md++) = (unsigned char)(t>>8);
168                                 *(md++) = (unsigned char)(t);
169                                 }
170                         break;
171                 /* ... as well as make sure md_len is not abused. */
172                 default:        return 0;
173                 }
174
175         return 1;
176         }
177
178 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
179 {   return SHA512_Final (md,c);   }
180
181 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
182         {
183         SHA_LONG64      l;
184         unsigned char  *p=c->u.p;
185         const unsigned char *data=(const unsigned char *)_data;
186
187         if (len==0) return  1;
188
189         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
190         if (l < c->Nl)          c->Nh++;
191         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
192         c->Nl=l;
193
194         if (c->num != 0)
195                 {
196                 size_t n = sizeof(c->u) - c->num;
197
198                 if (len < n)
199                         {
200                         memcpy (p+c->num,data,len), c->num += (unsigned int)len;
201                         return 1;
202                         }
203                 else    {
204                         memcpy (p+c->num,data,n), c->num = 0;
205                         len-=n, data+=n;
206                         sha512_block_data_order (c,p,1);
207                         }
208                 }
209
210         if (len >= sizeof(c->u))
211                 {
212 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
213                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
214                         while (len >= sizeof(c->u))
215                                 memcpy (p,data,sizeof(c->u)),
216                                 sha512_block_data_order (c,p,1),
217                                 len  -= sizeof(c->u),
218                                 data += sizeof(c->u);
219                 else
220 #endif
221                         sha512_block_data_order (c,data,len/sizeof(c->u)),
222                         data += len,
223                         len  %= sizeof(c->u),
224                         data -= len;
225                 }
226
227         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
228
229         return 1;
230         }
231
232 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
233 {   return SHA512_Update (c,data,len);   }
234
235 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
236 {   sha512_block_data_order (c,data,1);  }
237
238 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
239         {
240         SHA512_CTX c;
241         static unsigned char m[SHA384_DIGEST_LENGTH];
242
243         if (md == NULL) md=m;
244         SHA384_Init(&c);
245         SHA512_Update(&c,d,n);
246         SHA512_Final(md,&c);
247         OPENSSL_cleanse(&c,sizeof(c));
248         return(md);
249         }
250
251 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
252         {
253         SHA512_CTX c;
254         static unsigned char m[SHA512_DIGEST_LENGTH];
255
256         if (md == NULL) md=m;
257         SHA512_Init(&c);
258         SHA512_Update(&c,d,n);
259         SHA512_Final(md,&c);
260         OPENSSL_cleanse(&c,sizeof(c));
261         return(md);
262         }
263
264 #ifndef SHA512_ASM
265 static const SHA_LONG64 K512[80] = {
266         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
267         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
268         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
269         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
270         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
271         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
272         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
273         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
274         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
275         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
276         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
277         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
278         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
279         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
280         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
281         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
282         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
283         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
284         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
285         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
286         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
287         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
288         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
289         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
290         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
291         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
292         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
293         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
294         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
295         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
296         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
297         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
298         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
299         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
300         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
301         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
302         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
303         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
304         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
305         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
306
307 #ifndef PEDANTIC
308 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
309 #  if defined(__x86_64) || defined(__x86_64__)
310 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
311                                 asm ("rorq %1,%0"       \
312                                 : "=r"(ret)             \
313                                 : "J"(n),"0"(a)         \
314                                 : "cc"); ret;           })
315 #   if !defined(B_ENDIAN)
316 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
317                                 asm ("bswapq    %0"             \
318                                 : "=r"(ret)                     \
319                                 : "0"(ret)); ret;               })
320 #   endif
321 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
322 #   if defined(I386_ONLY)
323 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
324                          unsigned int hi=p[0],lo=p[1];          \
325                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
326                                     "roll $16,%%eax; roll $16,%%edx; "\
327                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
328                                 : "=a"(lo),"=d"(hi)             \
329                                 : "0"(lo),"1"(hi) : "cc");      \
330                                 ((SHA_LONG64)hi)<<32|lo;        })
331 #   else
332 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
333                          unsigned int hi=p[0],lo=p[1];          \
334                                 asm ("bswapl %0; bswapl %1;"    \
335                                 : "=r"(lo),"=r"(hi)             \
336                                 : "0"(lo),"1"(hi));             \
337                                 ((SHA_LONG64)hi)<<32|lo;        })
338 #   endif
339 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
340 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
341                                 asm ("rotrdi %0,%1,%2"  \
342                                 : "=r"(ret)             \
343                                 : "r"(a),"K"(n)); ret;  })
344 #  elif defined(__aarch64__)
345 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
346                                 asm ("ror %0,%1,%2"     \
347                                 : "=r"(ret)             \
348                                 : "r"(a),"I"(n)); ret;  })
349 #   if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
350         __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
351 #    define PULL64(x)   ({ SHA_LONG64 ret;                      \
352                                 asm ("rev       %0,%1"          \
353                                 : "=r"(ret)                     \
354                                 : "r"(*((const SHA_LONG64 *)(&(x))))); ret;             })
355 #   endif
356 #  endif
357 # elif defined(_MSC_VER)
358 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
359 #   pragma intrinsic(_rotr64)
360 #   define ROTR(a,n)    _rotr64((a),n)
361 #  endif
362 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
363 #   if defined(I386_ONLY)
364     static SHA_LONG64 __fastcall __pull64be(const void *x)
365     {   _asm    mov     edx, [ecx + 0]
366         _asm    mov     eax, [ecx + 4]
367         _asm    xchg    dh,dl
368         _asm    xchg    ah,al
369         _asm    rol     edx,16
370         _asm    rol     eax,16
371         _asm    xchg    dh,dl
372         _asm    xchg    ah,al
373     }
374 #   else
375     static SHA_LONG64 __fastcall __pull64be(const void *x)
376     {   _asm    mov     edx, [ecx + 0]
377         _asm    mov     eax, [ecx + 4]
378         _asm    bswap   edx
379         _asm    bswap   eax
380     }
381 #   endif
382 #   define PULL64(x) __pull64be(&(x))
383 #   if _MSC_VER<=1200
384 #    pragma inline_depth(0)
385 #   endif
386 #  endif
387 # endif
388 #endif
389
390 #ifndef PULL64
391 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
392 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
393 #endif
394
395 #ifndef ROTR
396 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
397 #endif
398
399 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
400 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
401 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
402 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
403
404 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
405 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
406
407
408 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
409 /*
410  * This code should give better results on 32-bit CPU with less than
411  * ~24 registers, both size and performance wise...
412  */
413 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
414         {
415         const SHA_LONG64 *W=in;
416         SHA_LONG64      A,E,T;
417         SHA_LONG64      X[9+80],*F;
418         int i;
419
420                         while (num--) {
421
422         F    = X+80;
423         A    = ctx->h[0];       F[1] = ctx->h[1];
424         F[2] = ctx->h[2];       F[3] = ctx->h[3];
425         E    = ctx->h[4];       F[5] = ctx->h[5];
426         F[6] = ctx->h[6];       F[7] = ctx->h[7];
427
428         for (i=0;i<16;i++,F--)
429                 {
430 #ifdef B_ENDIAN
431                 T = W[i];
432 #else
433                 T = PULL64(W[i]);
434 #endif
435                 F[0] = A;
436                 F[4] = E;
437                 F[8] = T;
438                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
439                 E    = F[3] + T;
440                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
441                 }
442
443         for (;i<80;i++,F--)
444                 {
445                 T    = sigma0(F[8+16-1]);
446                 T   += sigma1(F[8+16-14]);
447                 T   += F[8+16] + F[8+16-9];
448
449                 F[0] = A;
450                 F[4] = E;
451                 F[8] = T;
452                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
453                 E    = F[3] + T;
454                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
455                 }
456
457         ctx->h[0] += A;         ctx->h[1] += F[1];
458         ctx->h[2] += F[2];      ctx->h[3] += F[3];
459         ctx->h[4] += E;         ctx->h[5] += F[5];
460         ctx->h[6] += F[6];      ctx->h[7] += F[7];
461
462                         W+=SHA_LBLOCK;
463                         }
464         }
465
466 #elif defined(OPENSSL_SMALL_FOOTPRINT)
467
468 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
469         {
470         const SHA_LONG64 *W=in;
471         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
472         SHA_LONG64      X[16];
473         int i;
474
475                         while (num--) {
476
477         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
478         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
479
480         for (i=0;i<16;i++)
481                 {
482 #ifdef B_ENDIAN
483                 T1 = X[i] = W[i];
484 #else
485                 T1 = X[i] = PULL64(W[i]);
486 #endif
487                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
488                 T2 = Sigma0(a) + Maj(a,b,c);
489                 h = g;  g = f;  f = e;  e = d + T1;
490                 d = c;  c = b;  b = a;  a = T1 + T2;
491                 }
492
493         for (;i<80;i++)
494                 {
495                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
496                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
497
498                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
499                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
500                 T2 = Sigma0(a) + Maj(a,b,c);
501                 h = g;  g = f;  f = e;  e = d + T1;
502                 d = c;  c = b;  b = a;  a = T1 + T2;
503                 }
504
505         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
506         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
507
508                         W+=SHA_LBLOCK;
509                         }
510         }
511
512 #else
513
514 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
515         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
516         h = Sigma0(a) + Maj(a,b,c);                     \
517         d += T1;        h += T1;                } while (0)
518
519 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
520         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
521         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
522         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
523         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
524
525 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
526         {
527         const SHA_LONG64 *W=in;
528         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
529         SHA_LONG64      X[16];
530         int i;
531
532                         while (num--) {
533
534         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
535         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
536
537 #ifdef B_ENDIAN
538         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
539         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
540         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
541         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
542         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
543         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
544         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
545         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
546         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
547         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
548         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
549         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
550         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
551         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
552         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
553         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
554 #else
555         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
556         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
557         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
558         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
559         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
560         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
561         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
562         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
563         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
564         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
565         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
566         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
567         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
568         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
569         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
570         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
571 #endif
572
573         for (i=16;i<80;i+=16)
574                 {
575                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
576                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
577                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
578                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
579                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
580                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
581                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
582                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
583                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
584                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
585                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
586                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
587                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
588                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
589                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
590                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
591                 }
592
593         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
594         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
595
596                         W+=SHA_LBLOCK;
597                         }
598         }
599
600 #endif
601
602 #endif /* SHA512_ASM */
603
604 #else /* !OPENSSL_NO_SHA512 */
605
606 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
607 static void *dummy=&dummy;
608 #endif
609
610 #endif /* !OPENSSL_NO_SHA512 */