ARMv8 assembly pack: add Cortex performance numbers.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 __fips_constseg
54 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
55
56 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
57     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
58     defined(__s390__) || defined(__s390x__) || \
59     defined(__aarch64__) || \
60     defined(SHA512_ASM)
61 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
62 #endif
63
64 int SHA384_Init (SHA512_CTX *c)
65         {
66         c->h[0]=U64(0xcbbb9d5dc1059ed8);
67         c->h[1]=U64(0x629a292a367cd507);
68         c->h[2]=U64(0x9159015a3070dd17);
69         c->h[3]=U64(0x152fecd8f70e5939);
70         c->h[4]=U64(0x67332667ffc00b31);
71         c->h[5]=U64(0x8eb44a8768581511);
72         c->h[6]=U64(0xdb0c2e0d64f98fa7);
73         c->h[7]=U64(0x47b5481dbefa4fa4);
74
75         c->Nl=0;        c->Nh=0;
76         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
77         return 1;
78         }
79
80 int SHA512_Init (SHA512_CTX *c)
81         {
82         c->h[0]=U64(0x6a09e667f3bcc908);
83         c->h[1]=U64(0xbb67ae8584caa73b);
84         c->h[2]=U64(0x3c6ef372fe94f82b);
85         c->h[3]=U64(0xa54ff53a5f1d36f1);
86         c->h[4]=U64(0x510e527fade682d1);
87         c->h[5]=U64(0x9b05688c2b3e6c1f);
88         c->h[6]=U64(0x1f83d9abfb41bd6b);
89         c->h[7]=U64(0x5be0cd19137e2179);
90
91         c->Nl=0;        c->Nh=0;
92         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
93         return 1;
94         }
95
96 #ifndef SHA512_ASM
97 static
98 #endif
99 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
100
101 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
102         {
103         unsigned char *p=(unsigned char *)c->u.p;
104         size_t n=c->num;
105
106         p[n]=0x80;      /* There always is a room for one */
107         n++;
108         if (n > (sizeof(c->u)-16))
109                 memset (p+n,0,sizeof(c->u)-n), n=0,
110                 sha512_block_data_order (c,p,1);
111
112         memset (p+n,0,sizeof(c->u)-16-n);
113 #ifdef  B_ENDIAN
114         c->u.d[SHA_LBLOCK-2] = c->Nh;
115         c->u.d[SHA_LBLOCK-1] = c->Nl;
116 #else
117         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
118         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
119         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
120         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
121         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
122         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
123         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
124         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
125         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
126         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
127         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
128         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
129         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
130         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
131         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
132         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
133 #endif
134
135         sha512_block_data_order (c,p,1);
136
137         if (md==0) return 0;
138
139         switch (c->md_len)
140                 {
141                 /* Let compiler decide if it's appropriate to unroll... */
142                 case SHA384_DIGEST_LENGTH:
143                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
144                                 {
145                                 SHA_LONG64 t = c->h[n];
146
147                                 *(md++) = (unsigned char)(t>>56);
148                                 *(md++) = (unsigned char)(t>>48);
149                                 *(md++) = (unsigned char)(t>>40);
150                                 *(md++) = (unsigned char)(t>>32);
151                                 *(md++) = (unsigned char)(t>>24);
152                                 *(md++) = (unsigned char)(t>>16);
153                                 *(md++) = (unsigned char)(t>>8);
154                                 *(md++) = (unsigned char)(t);
155                                 }
156                         break;
157                 case SHA512_DIGEST_LENGTH:
158                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
159                                 {
160                                 SHA_LONG64 t = c->h[n];
161
162                                 *(md++) = (unsigned char)(t>>56);
163                                 *(md++) = (unsigned char)(t>>48);
164                                 *(md++) = (unsigned char)(t>>40);
165                                 *(md++) = (unsigned char)(t>>32);
166                                 *(md++) = (unsigned char)(t>>24);
167                                 *(md++) = (unsigned char)(t>>16);
168                                 *(md++) = (unsigned char)(t>>8);
169                                 *(md++) = (unsigned char)(t);
170                                 }
171                         break;
172                 /* ... as well as make sure md_len is not abused. */
173                 default:        return 0;
174                 }
175
176         return 1;
177         }
178
179 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
180 {   return SHA512_Final (md,c);   }
181
182 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
183         {
184         SHA_LONG64      l;
185         unsigned char  *p=c->u.p;
186         const unsigned char *data=(const unsigned char *)_data;
187
188         if (len==0) return  1;
189
190         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
191         if (l < c->Nl)          c->Nh++;
192         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
193         c->Nl=l;
194
195         if (c->num != 0)
196                 {
197                 size_t n = sizeof(c->u) - c->num;
198
199                 if (len < n)
200                         {
201                         memcpy (p+c->num,data,len), c->num += (unsigned int)len;
202                         return 1;
203                         }
204                 else    {
205                         memcpy (p+c->num,data,n), c->num = 0;
206                         len-=n, data+=n;
207                         sha512_block_data_order (c,p,1);
208                         }
209                 }
210
211         if (len >= sizeof(c->u))
212                 {
213 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
214                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
215                         while (len >= sizeof(c->u))
216                                 memcpy (p,data,sizeof(c->u)),
217                                 sha512_block_data_order (c,p,1),
218                                 len  -= sizeof(c->u),
219                                 data += sizeof(c->u);
220                 else
221 #endif
222                         sha512_block_data_order (c,data,len/sizeof(c->u)),
223                         data += len,
224                         len  %= sizeof(c->u),
225                         data -= len;
226                 }
227
228         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
229
230         return 1;
231         }
232
233 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
234 {   return SHA512_Update (c,data,len);   }
235
236 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
237         {
238 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
239         if ((size_t)data%sizeof(c->u.d[0]) != 0)
240                 memcpy(c->u.p,data,sizeof(c->u.p)),
241                 data = c->u.p;
242 #endif
243         sha512_block_data_order (c,data,1);
244         }
245
246 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
247         {
248         SHA512_CTX c;
249         static unsigned char m[SHA384_DIGEST_LENGTH];
250
251         if (md == NULL) md=m;
252         SHA384_Init(&c);
253         SHA512_Update(&c,d,n);
254         SHA512_Final(md,&c);
255         OPENSSL_cleanse(&c,sizeof(c));
256         return(md);
257         }
258
259 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
260         {
261         SHA512_CTX c;
262         static unsigned char m[SHA512_DIGEST_LENGTH];
263
264         if (md == NULL) md=m;
265         SHA512_Init(&c);
266         SHA512_Update(&c,d,n);
267         SHA512_Final(md,&c);
268         OPENSSL_cleanse(&c,sizeof(c));
269         return(md);
270         }
271
272 #ifndef SHA512_ASM
273 __fips_constseg
274 static const SHA_LONG64 K512[80] = {
275         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
276         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
277         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
278         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
279         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
280         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
281         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
282         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
283         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
284         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
285         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
286         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
287         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
288         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
289         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
290         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
291         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
292         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
293         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
294         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
295         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
296         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
297         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
298         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
299         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
300         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
301         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
302         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
303         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
304         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
305         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
306         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
307         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
308         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
309         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
310         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
311         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
312         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
313         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
314         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
315
316 #ifndef PEDANTIC
317 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
318 #  if defined(__x86_64) || defined(__x86_64__)
319 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
320                                 asm ("rorq %1,%0"       \
321                                 : "=r"(ret)             \
322                                 : "J"(n),"0"(a)         \
323                                 : "cc"); ret;           })
324 #   if !defined(B_ENDIAN)
325 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
326                                 asm ("bswapq    %0"             \
327                                 : "=r"(ret)                     \
328                                 : "0"(ret)); ret;               })
329 #   endif
330 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
331 #   if defined(I386_ONLY)
332 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
333                          unsigned int hi=p[0],lo=p[1];          \
334                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
335                                     "roll $16,%%eax; roll $16,%%edx; "\
336                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
337                                 : "=a"(lo),"=d"(hi)             \
338                                 : "0"(lo),"1"(hi) : "cc");      \
339                                 ((SHA_LONG64)hi)<<32|lo;        })
340 #   else
341 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
342                          unsigned int hi=p[0],lo=p[1];          \
343                                 asm ("bswapl %0; bswapl %1;"    \
344                                 : "=r"(lo),"=r"(hi)             \
345                                 : "0"(lo),"1"(hi));             \
346                                 ((SHA_LONG64)hi)<<32|lo;        })
347 #   endif
348 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
349 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
350                                 asm ("rotrdi %0,%1,%2"  \
351                                 : "=r"(ret)             \
352                                 : "r"(a),"K"(n)); ret;  })
353 #  elif defined(__aarch64__)
354 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
355                                 asm ("ror %0,%1,%2"     \
356                                 : "=r"(ret)             \
357                                 : "r"(a),"I"(n)); ret;  })
358 #   if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
359         __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
360 #    define PULL64(x)   ({ SHA_LONG64 ret;                      \
361                                 asm ("rev       %0,%1"          \
362                                 : "=r"(ret)                     \
363                                 : "r"(*((const SHA_LONG64 *)(&(x))))); ret;             })
364 #   endif
365 #  endif
366 # elif defined(_MSC_VER)
367 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
368 #   pragma intrinsic(_rotr64)
369 #   define ROTR(a,n)    _rotr64((a),n)
370 #  endif
371 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
372 #   if defined(I386_ONLY)
373     static SHA_LONG64 __fastcall __pull64be(const void *x)
374     {   _asm    mov     edx, [ecx + 0]
375         _asm    mov     eax, [ecx + 4]
376         _asm    xchg    dh,dl
377         _asm    xchg    ah,al
378         _asm    rol     edx,16
379         _asm    rol     eax,16
380         _asm    xchg    dh,dl
381         _asm    xchg    ah,al
382     }
383 #   else
384     static SHA_LONG64 __fastcall __pull64be(const void *x)
385     {   _asm    mov     edx, [ecx + 0]
386         _asm    mov     eax, [ecx + 4]
387         _asm    bswap   edx
388         _asm    bswap   eax
389     }
390 #   endif
391 #   define PULL64(x) __pull64be(&(x))
392 #   if _MSC_VER<=1200
393 #    pragma inline_depth(0)
394 #   endif
395 #  endif
396 # endif
397 #endif
398
399 #ifndef PULL64
400 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
401 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
402 #endif
403
404 #ifndef ROTR
405 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
406 #endif
407
408 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
409 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
410 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
411 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
412
413 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
414 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
415
416
417 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
418 /*
419  * This code should give better results on 32-bit CPU with less than
420  * ~24 registers, both size and performance wise...
421  */
422 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
423         {
424         const SHA_LONG64 *W=in;
425         SHA_LONG64      A,E,T;
426         SHA_LONG64      X[9+80],*F;
427         int i;
428
429                         while (num--) {
430
431         F    = X+80;
432         A    = ctx->h[0];       F[1] = ctx->h[1];
433         F[2] = ctx->h[2];       F[3] = ctx->h[3];
434         E    = ctx->h[4];       F[5] = ctx->h[5];
435         F[6] = ctx->h[6];       F[7] = ctx->h[7];
436
437         for (i=0;i<16;i++,F--)
438                 {
439 #ifdef B_ENDIAN
440                 T = W[i];
441 #else
442                 T = PULL64(W[i]);
443 #endif
444                 F[0] = A;
445                 F[4] = E;
446                 F[8] = T;
447                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
448                 E    = F[3] + T;
449                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
450                 }
451
452         for (;i<80;i++,F--)
453                 {
454                 T    = sigma0(F[8+16-1]);
455                 T   += sigma1(F[8+16-14]);
456                 T   += F[8+16] + F[8+16-9];
457
458                 F[0] = A;
459                 F[4] = E;
460                 F[8] = T;
461                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
462                 E    = F[3] + T;
463                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
464                 }
465
466         ctx->h[0] += A;         ctx->h[1] += F[1];
467         ctx->h[2] += F[2];      ctx->h[3] += F[3];
468         ctx->h[4] += E;         ctx->h[5] += F[5];
469         ctx->h[6] += F[6];      ctx->h[7] += F[7];
470
471                         W+=SHA_LBLOCK;
472                         }
473         }
474
475 #elif defined(OPENSSL_SMALL_FOOTPRINT)
476
477 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
478         {
479         const SHA_LONG64 *W=in;
480         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
481         SHA_LONG64      X[16];
482         int i;
483
484                         while (num--) {
485
486         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
487         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
488
489         for (i=0;i<16;i++)
490                 {
491 #ifdef B_ENDIAN
492                 T1 = X[i] = W[i];
493 #else
494                 T1 = X[i] = PULL64(W[i]);
495 #endif
496                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
497                 T2 = Sigma0(a) + Maj(a,b,c);
498                 h = g;  g = f;  f = e;  e = d + T1;
499                 d = c;  c = b;  b = a;  a = T1 + T2;
500                 }
501
502         for (;i<80;i++)
503                 {
504                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
505                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
506
507                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
508                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
509                 T2 = Sigma0(a) + Maj(a,b,c);
510                 h = g;  g = f;  f = e;  e = d + T1;
511                 d = c;  c = b;  b = a;  a = T1 + T2;
512                 }
513
514         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
515         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
516
517                         W+=SHA_LBLOCK;
518                         }
519         }
520
521 #else
522
523 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
524         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
525         h = Sigma0(a) + Maj(a,b,c);                     \
526         d += T1;        h += T1;                } while (0)
527
528 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
529         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
530         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
531         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
532         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
533
534 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
535         {
536         const SHA_LONG64 *W=in;
537         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
538         SHA_LONG64      X[16];
539         int i;
540
541                         while (num--) {
542
543         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
544         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
545
546 #ifdef B_ENDIAN
547         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
548         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
549         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
550         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
551         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
552         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
553         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
554         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
555         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
556         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
557         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
558         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
559         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
560         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
561         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
562         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
563 #else
564         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
565         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
566         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
567         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
568         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
569         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
570         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
571         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
572         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
573         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
574         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
575         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
576         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
577         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
578         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
579         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
580 #endif
581
582         for (i=16;i<80;i+=16)
583                 {
584                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
585                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
586                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
587                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
588                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
589                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
590                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
591                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
592                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
593                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
594                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
595                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
596                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
597                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
598                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
599                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
600                 }
601
602         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
603         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
604
605                         W+=SHA_LBLOCK;
606                         }
607         }
608
609 #endif
610
611 #endif /* SHA512_ASM */
612
613 #else /* !OPENSSL_NO_SHA512 */
614
615 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
616 static void *dummy=&dummy;
617 #endif
618
619 #endif /* !OPENSSL_NO_SHA512 */