Initial aarch64 bits.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 __fips_constseg
54 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
55
56 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
57     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
58     defined(__s390__) || defined(__s390x__) || \
59     defined(__aarch64__) || \
60     defined(SHA512_ASM)
61 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
62 #endif
63
64 int SHA384_Init (SHA512_CTX *c)
65         {
66         c->h[0]=U64(0xcbbb9d5dc1059ed8);
67         c->h[1]=U64(0x629a292a367cd507);
68         c->h[2]=U64(0x9159015a3070dd17);
69         c->h[3]=U64(0x152fecd8f70e5939);
70         c->h[4]=U64(0x67332667ffc00b31);
71         c->h[5]=U64(0x8eb44a8768581511);
72         c->h[6]=U64(0xdb0c2e0d64f98fa7);
73         c->h[7]=U64(0x47b5481dbefa4fa4);
74
75         c->Nl=0;        c->Nh=0;
76         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
77         return 1;
78         }
79
80 int SHA512_Init (SHA512_CTX *c)
81         {
82         c->h[0]=U64(0x6a09e667f3bcc908);
83         c->h[1]=U64(0xbb67ae8584caa73b);
84         c->h[2]=U64(0x3c6ef372fe94f82b);
85         c->h[3]=U64(0xa54ff53a5f1d36f1);
86         c->h[4]=U64(0x510e527fade682d1);
87         c->h[5]=U64(0x9b05688c2b3e6c1f);
88         c->h[6]=U64(0x1f83d9abfb41bd6b);
89         c->h[7]=U64(0x5be0cd19137e2179);
90
91         c->Nl=0;        c->Nh=0;
92         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
93         return 1;
94         }
95
96 #ifndef SHA512_ASM
97 static
98 #endif
99 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
100
101 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
102         {
103         unsigned char *p=(unsigned char *)c->u.p;
104         size_t n=c->num;
105
106         p[n]=0x80;      /* There always is a room for one */
107         n++;
108         if (n > (sizeof(c->u)-16))
109                 memset (p+n,0,sizeof(c->u)-n), n=0,
110                 sha512_block_data_order (c,p,1);
111
112         memset (p+n,0,sizeof(c->u)-16-n);
113 #ifdef  B_ENDIAN
114         c->u.d[SHA_LBLOCK-2] = c->Nh;
115         c->u.d[SHA_LBLOCK-1] = c->Nl;
116 #else
117         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
118         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
119         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
120         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
121         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
122         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
123         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
124         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
125         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
126         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
127         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
128         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
129         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
130         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
131         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
132         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
133 #endif
134
135         sha512_block_data_order (c,p,1);
136
137         if (md==0) return 0;
138
139         switch (c->md_len)
140                 {
141                 /* Let compiler decide if it's appropriate to unroll... */
142                 case SHA384_DIGEST_LENGTH:
143                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
144                                 {
145                                 SHA_LONG64 t = c->h[n];
146
147                                 *(md++) = (unsigned char)(t>>56);
148                                 *(md++) = (unsigned char)(t>>48);
149                                 *(md++) = (unsigned char)(t>>40);
150                                 *(md++) = (unsigned char)(t>>32);
151                                 *(md++) = (unsigned char)(t>>24);
152                                 *(md++) = (unsigned char)(t>>16);
153                                 *(md++) = (unsigned char)(t>>8);
154                                 *(md++) = (unsigned char)(t);
155                                 }
156                         break;
157                 case SHA512_DIGEST_LENGTH:
158                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
159                                 {
160                                 SHA_LONG64 t = c->h[n];
161
162                                 *(md++) = (unsigned char)(t>>56);
163                                 *(md++) = (unsigned char)(t>>48);
164                                 *(md++) = (unsigned char)(t>>40);
165                                 *(md++) = (unsigned char)(t>>32);
166                                 *(md++) = (unsigned char)(t>>24);
167                                 *(md++) = (unsigned char)(t>>16);
168                                 *(md++) = (unsigned char)(t>>8);
169                                 *(md++) = (unsigned char)(t);
170                                 }
171                         break;
172                 /* ... as well as make sure md_len is not abused. */
173                 default:        return 0;
174                 }
175
176         return 1;
177         }
178
179 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
180 {   return SHA512_Final (md,c);   }
181
182 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
183         {
184         SHA_LONG64      l;
185         unsigned char  *p=c->u.p;
186         const unsigned char *data=(const unsigned char *)_data;
187
188         if (len==0) return  1;
189
190         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
191         if (l < c->Nl)          c->Nh++;
192         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
193         c->Nl=l;
194
195         if (c->num != 0)
196                 {
197                 size_t n = sizeof(c->u) - c->num;
198
199                 if (len < n)
200                         {
201                         memcpy (p+c->num,data,len), c->num += (unsigned int)len;
202                         return 1;
203                         }
204                 else    {
205                         memcpy (p+c->num,data,n), c->num = 0;
206                         len-=n, data+=n;
207                         sha512_block_data_order (c,p,1);
208                         }
209                 }
210
211         if (len >= sizeof(c->u))
212                 {
213 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
214                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
215                         while (len >= sizeof(c->u))
216                                 memcpy (p,data,sizeof(c->u)),
217                                 sha512_block_data_order (c,p,1),
218                                 len  -= sizeof(c->u),
219                                 data += sizeof(c->u);
220                 else
221 #endif
222                         sha512_block_data_order (c,data,len/sizeof(c->u)),
223                         data += len,
224                         len  %= sizeof(c->u),
225                         data -= len;
226                 }
227
228         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
229
230         return 1;
231         }
232
233 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
234 {   return SHA512_Update (c,data,len);   }
235
236 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
237 {   sha512_block_data_order (c,data,1);  }
238
239 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
240         {
241         SHA512_CTX c;
242         static unsigned char m[SHA384_DIGEST_LENGTH];
243
244         if (md == NULL) md=m;
245         SHA384_Init(&c);
246         SHA512_Update(&c,d,n);
247         SHA512_Final(md,&c);
248         OPENSSL_cleanse(&c,sizeof(c));
249         return(md);
250         }
251
252 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
253         {
254         SHA512_CTX c;
255         static unsigned char m[SHA512_DIGEST_LENGTH];
256
257         if (md == NULL) md=m;
258         SHA512_Init(&c);
259         SHA512_Update(&c,d,n);
260         SHA512_Final(md,&c);
261         OPENSSL_cleanse(&c,sizeof(c));
262         return(md);
263         }
264
265 #ifndef SHA512_ASM
266 __fips_constseg
267 static const SHA_LONG64 K512[80] = {
268         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
269         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
270         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
271         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
272         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
273         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
274         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
275         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
276         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
277         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
278         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
279         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
280         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
281         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
282         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
283         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
284         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
285         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
286         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
287         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
288         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
289         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
290         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
291         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
292         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
293         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
294         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
295         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
296         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
297         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
298         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
299         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
300         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
301         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
302         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
303         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
304         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
305         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
306         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
307         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
308
309 #ifndef PEDANTIC
310 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
311 #  if defined(__x86_64) || defined(__x86_64__)
312 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
313                                 asm ("rorq %1,%0"       \
314                                 : "=r"(ret)             \
315                                 : "J"(n),"0"(a)         \
316                                 : "cc"); ret;           })
317 #   if !defined(B_ENDIAN)
318 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
319                                 asm ("bswapq    %0"             \
320                                 : "=r"(ret)                     \
321                                 : "0"(ret)); ret;               })
322 #   endif
323 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
324 #   if defined(I386_ONLY)
325 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
326                          unsigned int hi=p[0],lo=p[1];          \
327                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
328                                     "roll $16,%%eax; roll $16,%%edx; "\
329                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
330                                 : "=a"(lo),"=d"(hi)             \
331                                 : "0"(lo),"1"(hi) : "cc");      \
332                                 ((SHA_LONG64)hi)<<32|lo;        })
333 #   else
334 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
335                          unsigned int hi=p[0],lo=p[1];          \
336                                 asm ("bswapl %0; bswapl %1;"    \
337                                 : "=r"(lo),"=r"(hi)             \
338                                 : "0"(lo),"1"(hi));             \
339                                 ((SHA_LONG64)hi)<<32|lo;        })
340 #   endif
341 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
342 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
343                                 asm ("rotrdi %0,%1,%2"  \
344                                 : "=r"(ret)             \
345                                 : "r"(a),"K"(n)); ret;  })
346 #  elif defined(__aarch64__)
347 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
348                                 asm ("ror %0,%1,%2"     \
349                                 : "=r"(ret)             \
350                                 : "r"(a),"I"(n)); ret;  })
351 #   if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
352         __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
353 #    define PULL64(x)   ({ SHA_LONG64 ret;                      \
354                                 asm ("rev       %0,%1"          \
355                                 : "=r"(ret)                     \
356                                 : "r"(*((const SHA_LONG64 *)(&(x))))); ret;             })
357 #   endif
358 #  endif
359 # elif defined(_MSC_VER)
360 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
361 #   pragma intrinsic(_rotr64)
362 #   define ROTR(a,n)    _rotr64((a),n)
363 #  endif
364 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
365 #   if defined(I386_ONLY)
366     static SHA_LONG64 __fastcall __pull64be(const void *x)
367     {   _asm    mov     edx, [ecx + 0]
368         _asm    mov     eax, [ecx + 4]
369         _asm    xchg    dh,dl
370         _asm    xchg    ah,al
371         _asm    rol     edx,16
372         _asm    rol     eax,16
373         _asm    xchg    dh,dl
374         _asm    xchg    ah,al
375     }
376 #   else
377     static SHA_LONG64 __fastcall __pull64be(const void *x)
378     {   _asm    mov     edx, [ecx + 0]
379         _asm    mov     eax, [ecx + 4]
380         _asm    bswap   edx
381         _asm    bswap   eax
382     }
383 #   endif
384 #   define PULL64(x) __pull64be(&(x))
385 #   if _MSC_VER<=1200
386 #    pragma inline_depth(0)
387 #   endif
388 #  endif
389 # endif
390 #endif
391
392 #ifndef PULL64
393 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
394 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
395 #endif
396
397 #ifndef ROTR
398 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
399 #endif
400
401 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
402 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
403 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
404 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
405
406 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
407 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
408
409
410 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
411 /*
412  * This code should give better results on 32-bit CPU with less than
413  * ~24 registers, both size and performance wise...
414  */
415 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
416         {
417         const SHA_LONG64 *W=in;
418         SHA_LONG64      A,E,T;
419         SHA_LONG64      X[9+80],*F;
420         int i;
421
422                         while (num--) {
423
424         F    = X+80;
425         A    = ctx->h[0];       F[1] = ctx->h[1];
426         F[2] = ctx->h[2];       F[3] = ctx->h[3];
427         E    = ctx->h[4];       F[5] = ctx->h[5];
428         F[6] = ctx->h[6];       F[7] = ctx->h[7];
429
430         for (i=0;i<16;i++,F--)
431                 {
432 #ifdef B_ENDIAN
433                 T = W[i];
434 #else
435                 T = PULL64(W[i]);
436 #endif
437                 F[0] = A;
438                 F[4] = E;
439                 F[8] = T;
440                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
441                 E    = F[3] + T;
442                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
443                 }
444
445         for (;i<80;i++,F--)
446                 {
447                 T    = sigma0(F[8+16-1]);
448                 T   += sigma1(F[8+16-14]);
449                 T   += F[8+16] + F[8+16-9];
450
451                 F[0] = A;
452                 F[4] = E;
453                 F[8] = T;
454                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
455                 E    = F[3] + T;
456                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
457                 }
458
459         ctx->h[0] += A;         ctx->h[1] += F[1];
460         ctx->h[2] += F[2];      ctx->h[3] += F[3];
461         ctx->h[4] += E;         ctx->h[5] += F[5];
462         ctx->h[6] += F[6];      ctx->h[7] += F[7];
463
464                         W+=SHA_LBLOCK;
465                         }
466         }
467
468 #elif defined(OPENSSL_SMALL_FOOTPRINT)
469
470 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
471         {
472         const SHA_LONG64 *W=in;
473         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
474         SHA_LONG64      X[16];
475         int i;
476
477                         while (num--) {
478
479         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
480         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
481
482         for (i=0;i<16;i++)
483                 {
484 #ifdef B_ENDIAN
485                 T1 = X[i] = W[i];
486 #else
487                 T1 = X[i] = PULL64(W[i]);
488 #endif
489                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
490                 T2 = Sigma0(a) + Maj(a,b,c);
491                 h = g;  g = f;  f = e;  e = d + T1;
492                 d = c;  c = b;  b = a;  a = T1 + T2;
493                 }
494
495         for (;i<80;i++)
496                 {
497                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
498                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
499
500                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
501                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
502                 T2 = Sigma0(a) + Maj(a,b,c);
503                 h = g;  g = f;  f = e;  e = d + T1;
504                 d = c;  c = b;  b = a;  a = T1 + T2;
505                 }
506
507         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
508         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
509
510                         W+=SHA_LBLOCK;
511                         }
512         }
513
514 #else
515
516 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
517         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
518         h = Sigma0(a) + Maj(a,b,c);                     \
519         d += T1;        h += T1;                } while (0)
520
521 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
522         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
523         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
524         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
525         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
526
527 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
528         {
529         const SHA_LONG64 *W=in;
530         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
531         SHA_LONG64      X[16];
532         int i;
533
534                         while (num--) {
535
536         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
537         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
538
539 #ifdef B_ENDIAN
540         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
541         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
542         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
543         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
544         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
545         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
546         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
547         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
548         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
549         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
550         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
551         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
552         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
553         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
554         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
555         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
556 #else
557         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
558         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
559         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
560         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
561         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
562         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
563         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
564         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
565         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
566         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
567         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
568         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
569         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
570         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
571         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
572         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
573 #endif
574
575         for (i=16;i<80;i+=16)
576                 {
577                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
578                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
579                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
580                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
581                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
582                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
583                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
584                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
585                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
586                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
587                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
588                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
589                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
590                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
591                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
592                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
593                 }
594
595         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
596         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
597
598                         W+=SHA_LBLOCK;
599                         }
600         }
601
602 #endif
603
604 #endif /* SHA512_ASM */
605
606 #else /* !OPENSSL_NO_SHA512 */
607
608 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
609 static void *dummy=&dummy;
610 #endif
611
612 #endif /* !OPENSSL_NO_SHA512 */