832cae24f186bb9f6b66bd24fdff54f6612fb4c9
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__) || \
58     defined(SHA512_ASM)
59 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60 #endif
61
62 int SHA384_Init (SHA512_CTX *c)
63         {
64 #if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
65         /* maintain dword order required by assembler module */
66         unsigned int *h = (unsigned int *)c->h;
67
68         h[0]  = 0xcbbb9d5d; h[1]  = 0xc1059ed8;
69         h[2]  = 0x629a292a; h[3]  = 0x367cd507;
70         h[4]  = 0x9159015a; h[5]  = 0x3070dd17;
71         h[6]  = 0x152fecd8; h[7]  = 0xf70e5939;
72         h[8]  = 0x67332667; h[9]  = 0xffc00b31;
73         h[10] = 0x8eb44a87; h[11] = 0x68581511;
74         h[12] = 0xdb0c2e0d; h[13] = 0x64f98fa7;
75         h[14] = 0x47b5481d; h[15] = 0xbefa4fa4;
76 #else
77         c->h[0]=U64(0xcbbb9d5dc1059ed8);
78         c->h[1]=U64(0x629a292a367cd507);
79         c->h[2]=U64(0x9159015a3070dd17);
80         c->h[3]=U64(0x152fecd8f70e5939);
81         c->h[4]=U64(0x67332667ffc00b31);
82         c->h[5]=U64(0x8eb44a8768581511);
83         c->h[6]=U64(0xdb0c2e0d64f98fa7);
84         c->h[7]=U64(0x47b5481dbefa4fa4);
85 #endif
86         c->Nl=0;        c->Nh=0;
87         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
88         return 1;
89         }
90
91 int SHA512_Init (SHA512_CTX *c)
92         {
93 #if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
94         /* maintain dword order required by assembler module */
95         unsigned int *h = (unsigned int *)c->h;
96
97         h[0]  = 0x6a09e667; h[1]  = 0xf3bcc908;
98         h[2]  = 0xbb67ae85; h[3]  = 0x84caa73b;
99         h[4]  = 0x3c6ef372; h[5]  = 0xfe94f82b;
100         h[6]  = 0xa54ff53a; h[7]  = 0x5f1d36f1;
101         h[8]  = 0x510e527f; h[9]  = 0xade682d1;
102         h[10] = 0x9b05688c; h[11] = 0x2b3e6c1f;
103         h[12] = 0x1f83d9ab; h[13] = 0xfb41bd6b;
104         h[14] = 0x5be0cd19; h[15] = 0x137e2179;
105 #else
106         c->h[0]=U64(0x6a09e667f3bcc908);
107         c->h[1]=U64(0xbb67ae8584caa73b);
108         c->h[2]=U64(0x3c6ef372fe94f82b);
109         c->h[3]=U64(0xa54ff53a5f1d36f1);
110         c->h[4]=U64(0x510e527fade682d1);
111         c->h[5]=U64(0x9b05688c2b3e6c1f);
112         c->h[6]=U64(0x1f83d9abfb41bd6b);
113         c->h[7]=U64(0x5be0cd19137e2179);
114 #endif
115         c->Nl=0;        c->Nh=0;
116         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
117         return 1;
118         }
119
120 #ifndef SHA512_ASM
121 static
122 #endif
123 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
124
125 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
126         {
127         unsigned char *p=(unsigned char *)c->u.p;
128         size_t n=c->num;
129
130         p[n]=0x80;      /* There always is a room for one */
131         n++;
132         if (n > (sizeof(c->u)-16))
133                 memset (p+n,0,sizeof(c->u)-n), n=0,
134                 sha512_block_data_order (c,p,1);
135
136         memset (p+n,0,sizeof(c->u)-16-n);
137 #ifdef  B_ENDIAN
138         c->u.d[SHA_LBLOCK-2] = c->Nh;
139         c->u.d[SHA_LBLOCK-1] = c->Nl;
140 #else
141         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
142         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
143         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
144         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
145         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
146         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
147         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
148         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
149         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
150         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
151         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
152         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
153         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
154         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
155         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
156         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
157 #endif
158
159         sha512_block_data_order (c,p,1);
160
161         if (md==0) return 0;
162
163 #if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
164         /* recall assembler dword order... */
165         n = c->md_len;
166         if (n == SHA384_DIGEST_LENGTH || n == SHA512_DIGEST_LENGTH)
167                 {
168                 unsigned int *h = (unsigned int *)c->h, t;
169
170                 for (n/=4;n;n--)
171                         {
172                         t = *(h++);
173                         *(md++) = (unsigned char)(t>>24);
174                         *(md++) = (unsigned char)(t>>16);
175                         *(md++) = (unsigned char)(t>>8);
176                         *(md++) = (unsigned char)(t);
177                         }
178                 }
179         else    return 0;
180 #else
181         switch (c->md_len)
182                 {
183                 /* Let compiler decide if it's appropriate to unroll... */
184                 case SHA384_DIGEST_LENGTH:
185                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
186                                 {
187                                 SHA_LONG64 t = c->h[n];
188
189                                 *(md++) = (unsigned char)(t>>56);
190                                 *(md++) = (unsigned char)(t>>48);
191                                 *(md++) = (unsigned char)(t>>40);
192                                 *(md++) = (unsigned char)(t>>32);
193                                 *(md++) = (unsigned char)(t>>24);
194                                 *(md++) = (unsigned char)(t>>16);
195                                 *(md++) = (unsigned char)(t>>8);
196                                 *(md++) = (unsigned char)(t);
197                                 }
198                         break;
199                 case SHA512_DIGEST_LENGTH:
200                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
201                                 {
202                                 SHA_LONG64 t = c->h[n];
203
204                                 *(md++) = (unsigned char)(t>>56);
205                                 *(md++) = (unsigned char)(t>>48);
206                                 *(md++) = (unsigned char)(t>>40);
207                                 *(md++) = (unsigned char)(t>>32);
208                                 *(md++) = (unsigned char)(t>>24);
209                                 *(md++) = (unsigned char)(t>>16);
210                                 *(md++) = (unsigned char)(t>>8);
211                                 *(md++) = (unsigned char)(t);
212                                 }
213                         break;
214                 /* ... as well as make sure md_len is not abused. */
215                 default:        return 0;
216                 }
217 #endif
218         return 1;
219         }
220
221 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
222 {   return SHA512_Final (md,c);   }
223
224 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
225         {
226         SHA_LONG64      l;
227         unsigned char  *p=c->u.p;
228         const unsigned char *data=(const unsigned char *)_data;
229
230         if (len==0) return  1;
231
232         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
233         if (l < c->Nl)          c->Nh++;
234         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
235         c->Nl=l;
236
237         if (c->num != 0)
238                 {
239                 size_t n = sizeof(c->u) - c->num;
240
241                 if (len < n)
242                         {
243                         memcpy (p+c->num,data,len), c->num += len;
244                         return 1;
245                         }
246                 else    {
247                         memcpy (p+c->num,data,n), c->num = 0;
248                         len-=n, data+=n;
249                         sha512_block_data_order (c,p,1);
250                         }
251                 }
252
253         if (len >= sizeof(c->u))
254                 {
255 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
256                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
257                         while (len >= sizeof(c->u))
258                                 memcpy (p,data,sizeof(c->u)),
259                                 sha512_block_data_order (c,p,1),
260                                 len  -= sizeof(c->u),
261                                 data += sizeof(c->u);
262                 else
263 #endif
264                         sha512_block_data_order (c,data,len/sizeof(c->u)),
265                         data += len,
266                         len  %= sizeof(c->u),
267                         data -= len;
268                 }
269
270         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
271
272         return 1;
273         }
274
275 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
276 {   return SHA512_Update (c,data,len);   }
277
278 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
279 {   sha512_block_data_order (c,data,1);  }
280
281 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
282         {
283         SHA512_CTX c;
284         static unsigned char m[SHA384_DIGEST_LENGTH];
285
286         if (md == NULL) md=m;
287         SHA384_Init(&c);
288         SHA512_Update(&c,d,n);
289         SHA512_Final(md,&c);
290         OPENSSL_cleanse(&c,sizeof(c));
291         return(md);
292         }
293
294 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
295         {
296         SHA512_CTX c;
297         static unsigned char m[SHA512_DIGEST_LENGTH];
298
299         if (md == NULL) md=m;
300         SHA512_Init(&c);
301         SHA512_Update(&c,d,n);
302         SHA512_Final(md,&c);
303         OPENSSL_cleanse(&c,sizeof(c));
304         return(md);
305         }
306
307 #ifndef SHA512_ASM
308 static const SHA_LONG64 K512[80] = {
309         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
310         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
311         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
312         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
313         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
314         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
315         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
316         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
317         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
318         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
319         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
320         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
321         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
322         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
323         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
324         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
325         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
326         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
327         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
328         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
329         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
330         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
331         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
332         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
333         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
334         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
335         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
336         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
337         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
338         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
339         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
340         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
341         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
342         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
343         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
344         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
345         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
346         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
347         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
348         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
349
350 #ifndef PEDANTIC
351 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
352 #  if defined(__x86_64) || defined(__x86_64__)
353 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
354                                 asm ("rorq %1,%0"       \
355                                 : "=r"(ret)             \
356                                 : "J"(n),"0"(a)         \
357                                 : "cc"); ret;           })
358 #   if !defined(B_ENDIAN)
359 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
360                                 asm ("bswapq    %0"             \
361                                 : "=r"(ret)                     \
362                                 : "0"(ret)); ret;               })
363 #   endif
364 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
365 #   if defined(I386_ONLY)
366 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
367                          unsigned int hi=p[0],lo=p[1];          \
368                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
369                                     "roll $16,%%eax; roll $16,%%edx; "\
370                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
371                                 : "=a"(lo),"=d"(hi)             \
372                                 : "0"(lo),"1"(hi) : "cc");      \
373                                 ((SHA_LONG64)hi)<<32|lo;        })
374 #   else
375 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
376                          unsigned int hi=p[0],lo=p[1];          \
377                                 asm ("bswapl %0; bswapl %1;"    \
378                                 : "=r"(lo),"=r"(hi)             \
379                                 : "0"(lo),"1"(hi));             \
380                                 ((SHA_LONG64)hi)<<32|lo;        })
381 #   endif
382 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
383 #   define ROTR(a,n)    ({ unsigned long ret;           \
384                                 asm ("rotrdi %0,%1,%2"  \
385                                 : "=r"(ret)             \
386                                 : "r"(a),"K"(n)); ret;  })
387 #  endif
388 # elif defined(_MSC_VER)
389 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
390 #   pragma intrinsic(_rotr64)
391 #   define ROTR(a,n)    _rotr64((a),n)
392 #  endif
393 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
394 #   if defined(I386_ONLY)
395     static SHA_LONG64 __fastcall __pull64be(const void *x)
396     {   _asm    mov     edx, [ecx + 0]
397         _asm    mov     eax, [ecx + 4]
398         _asm    xchg    dh,dl
399         _asm    xchg    ah,al
400         _asm    rol     edx,16
401         _asm    rol     eax,16
402         _asm    xchg    dh,dl
403         _asm    xchg    ah,al
404     }
405 #   else
406     static SHA_LONG64 __fastcall __pull64be(const void *x)
407     {   _asm    mov     edx, [ecx + 0]
408         _asm    mov     eax, [ecx + 4]
409         _asm    bswap   edx
410         _asm    bswap   eax
411     }
412 #   endif
413 #   define PULL64(x) __pull64be(&(x))
414 #   if _MSC_VER<=1200
415 #    pragma inline_depth(0)
416 #   endif
417 #  endif
418 # endif
419 #endif
420
421 #ifndef PULL64
422 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
423 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
424 #endif
425
426 #ifndef ROTR
427 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
428 #endif
429
430 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
431 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
432 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
433 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
434
435 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
436 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
437
438
439 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
440 /*
441  * This code should give better results on 32-bit CPU with less than
442  * ~24 registers, both size and performance wise...
443  */
444 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
445         {
446         const SHA_LONG64 *W=in;
447         SHA_LONG64      A,E,T;
448         SHA_LONG64      X[9+80],*F;
449         int i;
450
451                         while (num--) {
452
453         F    = X+80;
454         A    = ctx->h[0];       F[1] = ctx->h[1];
455         F[2] = ctx->h[2];       F[3] = ctx->h[3];
456         E    = ctx->h[4];       F[5] = ctx->h[5];
457         F[6] = ctx->h[6];       F[7] = ctx->h[7];
458
459         for (i=0;i<16;i++,F--)
460                 {
461 #ifdef B_ENDIAN
462                 T = W[i];
463 #else
464                 T = PULL64(W[i]);
465 #endif
466                 F[0] = A;
467                 F[4] = E;
468                 F[8] = T;
469                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
470                 E    = F[3] + T;
471                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
472                 }
473
474         for (;i<80;i++,F--)
475                 {
476                 T    = sigma0(F[8+16-1]);
477                 T   += sigma1(F[8+16-14]);
478                 T   += F[8+16] + F[8+16-9];
479
480                 F[0] = A;
481                 F[4] = E;
482                 F[8] = T;
483                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
484                 E    = F[3] + T;
485                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
486                 }
487
488         ctx->h[0] += A;         ctx->h[1] += F[1];
489         ctx->h[2] += F[2];      ctx->h[3] += F[3];
490         ctx->h[4] += E;         ctx->h[5] += F[5];
491         ctx->h[6] += F[6];      ctx->h[7] += F[7];
492
493                         W+=SHA_LBLOCK;
494                         }
495         }
496
497 #elif defined(OPENSSL_SMALL_FOOTPRINT)
498
499 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
500         {
501         const SHA_LONG64 *W=in;
502         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
503         SHA_LONG64      X[16];
504         int i;
505
506                         while (num--) {
507
508         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
509         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
510
511         for (i=0;i<16;i++)
512                 {
513 #ifdef B_ENDIAN
514                 T1 = X[i] = W[i];
515 #else
516                 T1 = X[i] = PULL64(W[i]);
517 #endif
518                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
519                 T2 = Sigma0(a) + Maj(a,b,c);
520                 h = g;  g = f;  f = e;  e = d + T1;
521                 d = c;  c = b;  b = a;  a = T1 + T2;
522                 }
523
524         for (;i<80;i++)
525                 {
526                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
527                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
528
529                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
530                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
531                 T2 = Sigma0(a) + Maj(a,b,c);
532                 h = g;  g = f;  f = e;  e = d + T1;
533                 d = c;  c = b;  b = a;  a = T1 + T2;
534                 }
535
536         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
537         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
538
539                         W+=SHA_LBLOCK;
540                         }
541         }
542
543 #else
544
545 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
546         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
547         h = Sigma0(a) + Maj(a,b,c);                     \
548         d += T1;        h += T1;                } while (0)
549
550 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
551         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
552         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
553         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
554         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
555
556 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
557         {
558         const SHA_LONG64 *W=in;
559         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
560         SHA_LONG64      X[16];
561         int i;
562
563                         while (num--) {
564
565         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
566         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
567
568 #ifdef B_ENDIAN
569         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
570         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
571         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
572         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
573         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
574         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
575         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
576         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
577         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
578         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
579         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
580         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
581         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
582         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
583         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
584         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
585 #else
586         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
587         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
588         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
589         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
590         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
591         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
592         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
593         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
594         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
595         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
596         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
597         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
598         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
599         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
600         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
601         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
602 #endif
603
604         for (i=16;i<80;i+=16)
605                 {
606                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
607                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
608                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
609                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
610                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
611                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
612                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
613                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
614                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
615                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
616                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
617                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
618                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
619                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
620                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
621                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
622                 }
623
624         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
625         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
626
627                         W+=SHA_LBLOCK;
628                         }
629         }
630
631 #endif
632
633 #endif /* SHA512_ASM */
634
635 #else /* !OPENSSL_NO_SHA512 */
636
637 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
638 static void *dummy=&dummy;
639 #endif
640
641 #endif /* !OPENSSL_NO_SHA512 */