Commentary updates.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__) || \
58     defined(SHA512_ASM)
59 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60 #endif
61
62 int SHA384_Init (SHA512_CTX *c)
63         {
64         c->h[0]=U64(0xcbbb9d5dc1059ed8);
65         c->h[1]=U64(0x629a292a367cd507);
66         c->h[2]=U64(0x9159015a3070dd17);
67         c->h[3]=U64(0x152fecd8f70e5939);
68         c->h[4]=U64(0x67332667ffc00b31);
69         c->h[5]=U64(0x8eb44a8768581511);
70         c->h[6]=U64(0xdb0c2e0d64f98fa7);
71         c->h[7]=U64(0x47b5481dbefa4fa4);
72         c->Nl=0;        c->Nh=0;
73         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
74         return 1;
75         }
76
77 int SHA512_Init (SHA512_CTX *c)
78         {
79         c->h[0]=U64(0x6a09e667f3bcc908);
80         c->h[1]=U64(0xbb67ae8584caa73b);
81         c->h[2]=U64(0x3c6ef372fe94f82b);
82         c->h[3]=U64(0xa54ff53a5f1d36f1);
83         c->h[4]=U64(0x510e527fade682d1);
84         c->h[5]=U64(0x9b05688c2b3e6c1f);
85         c->h[6]=U64(0x1f83d9abfb41bd6b);
86         c->h[7]=U64(0x5be0cd19137e2179);
87         c->Nl=0;        c->Nh=0;
88         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
89         return 1;
90         }
91
92 #ifndef SHA512_ASM
93 static
94 #endif
95 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
96
97 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
98         {
99         unsigned char *p=(unsigned char *)c->u.p;
100         size_t n=c->num;
101
102         p[n]=0x80;      /* There always is a room for one */
103         n++;
104         if (n > (sizeof(c->u)-16))
105                 memset (p+n,0,sizeof(c->u)-n), n=0,
106                 sha512_block_data_order (c,p,1);
107
108         memset (p+n,0,sizeof(c->u)-16-n);
109 #ifdef  B_ENDIAN
110         c->u.d[SHA_LBLOCK-2] = c->Nh;
111         c->u.d[SHA_LBLOCK-1] = c->Nl;
112 #else
113         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
114         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
115         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
116         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
117         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
118         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
119         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
120         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
121         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
122         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
123         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
124         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
125         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
126         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
127         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
128         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
129 #endif
130
131         sha512_block_data_order (c,p,1);
132
133         if (md==0) return 0;
134
135         switch (c->md_len)
136                 {
137                 /* Let compiler decide if it's appropriate to unroll... */
138                 case SHA384_DIGEST_LENGTH:
139                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
140                                 {
141                                 SHA_LONG64 t = c->h[n];
142
143                                 *(md++) = (unsigned char)(t>>56);
144                                 *(md++) = (unsigned char)(t>>48);
145                                 *(md++) = (unsigned char)(t>>40);
146                                 *(md++) = (unsigned char)(t>>32);
147                                 *(md++) = (unsigned char)(t>>24);
148                                 *(md++) = (unsigned char)(t>>16);
149                                 *(md++) = (unsigned char)(t>>8);
150                                 *(md++) = (unsigned char)(t);
151                                 }
152                         break;
153                 case SHA512_DIGEST_LENGTH:
154                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
155                                 {
156                                 SHA_LONG64 t = c->h[n];
157
158                                 *(md++) = (unsigned char)(t>>56);
159                                 *(md++) = (unsigned char)(t>>48);
160                                 *(md++) = (unsigned char)(t>>40);
161                                 *(md++) = (unsigned char)(t>>32);
162                                 *(md++) = (unsigned char)(t>>24);
163                                 *(md++) = (unsigned char)(t>>16);
164                                 *(md++) = (unsigned char)(t>>8);
165                                 *(md++) = (unsigned char)(t);
166                                 }
167                         break;
168                 /* ... as well as make sure md_len is not abused. */
169                 default:        return 0;
170                 }
171
172         return 1;
173         }
174
175 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
176 {   return SHA512_Final (md,c);   }
177
178 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
179         {
180         SHA_LONG64      l;
181         unsigned char  *p=c->u.p;
182         const unsigned char *data=(const unsigned char *)_data;
183
184         if (len==0) return  1;
185
186         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
187         if (l < c->Nl)          c->Nh++;
188         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
189         c->Nl=l;
190
191         if (c->num != 0)
192                 {
193                 size_t n = sizeof(c->u) - c->num;
194
195                 if (len < n)
196                         {
197                         memcpy (p+c->num,data,len), c->num += len;
198                         return 1;
199                         }
200                 else    {
201                         memcpy (p+c->num,data,n), c->num = 0;
202                         len-=n, data+=n;
203                         sha512_block_data_order (c,p,1);
204                         }
205                 }
206
207         if (len >= sizeof(c->u))
208                 {
209 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
210                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
211                         while (len >= sizeof(c->u))
212                                 memcpy (p,data,sizeof(c->u)),
213                                 sha512_block_data_order (c,p,1),
214                                 len  -= sizeof(c->u),
215                                 data += sizeof(c->u);
216                 else
217 #endif
218                         sha512_block_data_order (c,data,len/sizeof(c->u)),
219                         data += len,
220                         len  %= sizeof(c->u),
221                         data -= len;
222                 }
223
224         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
225
226         return 1;
227         }
228
229 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
230 {   return SHA512_Update (c,data,len);   }
231
232 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
233 {   sha512_block_data_order (c,data,1);  }
234
235 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
236         {
237         SHA512_CTX c;
238         static unsigned char m[SHA384_DIGEST_LENGTH];
239
240         if (md == NULL) md=m;
241         SHA384_Init(&c);
242         SHA512_Update(&c,d,n);
243         SHA512_Final(md,&c);
244         OPENSSL_cleanse(&c,sizeof(c));
245         return(md);
246         }
247
248 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
249         {
250         SHA512_CTX c;
251         static unsigned char m[SHA512_DIGEST_LENGTH];
252
253         if (md == NULL) md=m;
254         SHA512_Init(&c);
255         SHA512_Update(&c,d,n);
256         SHA512_Final(md,&c);
257         OPENSSL_cleanse(&c,sizeof(c));
258         return(md);
259         }
260
261 #ifndef SHA512_ASM
262 static const SHA_LONG64 K512[80] = {
263         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
264         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
265         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
266         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
267         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
268         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
269         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
270         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
271         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
272         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
273         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
274         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
275         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
276         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
277         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
278         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
279         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
280         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
281         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
282         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
283         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
284         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
285         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
286         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
287         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
288         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
289         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
290         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
291         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
292         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
293         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
294         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
295         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
296         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
297         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
298         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
299         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
300         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
301         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
302         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
303
304 #ifndef PEDANTIC
305 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
306 #  if defined(__x86_64) || defined(__x86_64__)
307 #   define ROTR(a,n)    ({ unsigned long ret;           \
308                                 asm ("rorq %1,%0"       \
309                                 : "=r"(ret)             \
310                                 : "J"(n),"0"(a)         \
311                                 : "cc"); ret;           })
312 #   if !defined(B_ENDIAN)
313 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
314                                 asm ("bswapq    %0"             \
315                                 : "=r"(ret)                     \
316                                 : "0"(ret)); ret;               })
317 #   endif
318 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
319 #   if defined(I386_ONLY)
320 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
321                         unsigned int hi,lo;                     \
322                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
323                                     "roll $16,%%eax; roll $16,%%edx; "\
324                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
325                                 : "=a"(lo),"=d"(hi)             \
326                                 : "0"(p[1]),"1"(p[0]) : "cc");  \
327                                 ((SHA_LONG64)hi)<<32|lo;        })
328 #   else
329 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
330                         unsigned int hi,lo;                     \
331                                 asm ("bswapl %0; bswapl %1;"    \
332                                 : "=r"(lo),"=r"(hi)             \
333                                 : "0"(p[1]),"1"(p[0]));         \
334                                 ((SHA_LONG64)hi)<<32|lo;        })
335 #   endif
336 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
337 #   define ROTR(a,n)    ({ unsigned long ret;           \
338                                 asm ("rotrdi %0,%1,%2"  \
339                                 : "=r"(ret)             \
340                                 : "r"(a),"K"(n)); ret;  })
341 #  endif
342 # elif defined(_MSC_VER)
343 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
344 #   define ROTR(a,n)    _rotr64((a),n)
345 #  endif
346 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
347 #   if defined(I386_ONLY)
348     static SHA_LONG64 __fastcall __pull64be(const void *x)
349     {   _asm    mov     edx, [ecx + 0]
350         _asm    mov     eax, [ecx + 4]
351         _asm    xchg    dh,dl
352         _asm    xchg    ah,al
353         _asm    rol     edx,16
354         _asm    rol     eax,16
355         _asm    xchg    dh,dl
356         _asm    xchg    ah,al
357     }
358 #   else
359     static SHA_LONG64 __fastcall __pull64be(const void *x)
360     {   _asm    mov     edx, [ecx + 0]
361         _asm    mov     eax, [ecx + 4]
362         _asm    bswap   edx
363         _asm    bswap   eax
364     }
365 #   endif
366 #   define PULL64(x) __pull64be(&(x))
367 #   if _MSC_VER<=1200
368 #    pragma inline_depth(0)
369 #   endif
370 #  endif
371 # endif
372 #endif
373
374 #ifndef PULL64
375 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
376 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
377 #endif
378
379 #ifndef ROTR
380 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
381 #endif
382
383 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
384 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
385 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
386 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
387
388 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
389 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
390
391
392 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
393
394 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
395 #define GO_FOR_SSE2(ctx,in,num)         do {            \
396         void    sha512_block_sse2(void *,const void *,size_t);  \
397         if (!(OPENSSL_ia32cap_P & (1<<26))) break;      \
398         sha512_block_sse2(ctx->h,in,num); return;       \
399                                         } while (0)
400 #endif
401
402 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
403         {
404         const SHA_LONG64 *W=in;
405         SHA_LONG64      T1;
406         SHA_LONG64      X[9+80],*F;
407         int i;
408
409 #ifdef GO_FOR_SSE2
410         GO_FOR_SSE2(ctx,in,num);
411 #endif
412
413                         while (num--) {
414
415         F = X+80;
416         F[0] = ctx->h[0];       F[1] = ctx->h[1];
417         F[2] = ctx->h[2];       F[3] = ctx->h[3];
418         F[4] = ctx->h[4];       F[5] = ctx->h[5];
419         F[6] = ctx->h[6];       F[7] = ctx->h[7];
420
421         for (i=0;i<16;i++,F--)
422                 {
423 #ifdef B_ENDIAN
424                 T1 = W[i];
425 #else
426                 T1 = PULL64(W[i]);
427 #endif
428                 F[8]  = T1;
429                 T1   += F[7] + Sigma1(F[4]) + Ch(F[4],F[5],F[6]) + K512[i];
430                 F[3] += T1;
431                 T1   += Sigma0(F[0]) + Maj(F[0],F[1],F[2]);
432                 F[-1] = T1;
433                 }
434
435         for (;i<80;i++,F--)
436                 {
437                 T1    = sigma0(F[8+16-1]);
438                 T1   += sigma1(F[8+16-14]);
439
440                 T1   += F[8+16] + F[8+16-9];
441                 F[8]  = T1;
442                 T1   += F[7] + Sigma1(F[4]) + Ch(F[4],F[5],F[6]) + K512[i];
443                 F[3] += T1;
444                 T1   += Sigma0(F[0]) + Maj(F[0],F[1],F[2]);
445                 F[-1] = T1;
446                 }
447
448         ctx->h[0] += F[0];      ctx->h[1] += F[1];
449         ctx->h[2] += F[2];      ctx->h[3] += F[3];
450         ctx->h[4] += F[4];      ctx->h[5] += F[5];
451         ctx->h[6] += F[6];      ctx->h[7] += F[7];
452
453                         W+=SHA_LBLOCK;
454                         }
455         }
456
457 #elif defined(OPENSSL_SMALL_FOOTPRINT)
458
459 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
460         {
461         const SHA_LONG64 *W=in;
462         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
463         SHA_LONG64      X[16];
464         int i;
465
466                         while (num--) {
467
468         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
469         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
470
471         for (i=0;i<16;i++)
472                 {
473 #ifdef B_ENDIAN
474                 T1 = X[i] = W[i];
475 #else
476                 T1 = X[i] = PULL64(W[i]);
477 #endif
478                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
479                 T2 = Sigma0(a) + Maj(a,b,c);
480                 h = g;  g = f;  f = e;  e = d + T1;
481                 d = c;  c = b;  b = a;  a = T1 + T2;
482                 }
483
484         for (;i<80;i++)
485                 {
486                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
487                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
488
489                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
490                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
491                 T2 = Sigma0(a) + Maj(a,b,c);
492                 h = g;  g = f;  f = e;  e = d + T1;
493                 d = c;  c = b;  b = a;  a = T1 + T2;
494                 }
495
496         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
497         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
498
499                         W+=SHA_LBLOCK;
500                         }
501         }
502
503 #else
504
505 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
506         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
507         h = Sigma0(a) + Maj(a,b,c);                     \
508         d += T1;        h += T1;                } while (0)
509
510 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
511         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
512         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
513         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
514         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
515
516 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
517         {
518         const SHA_LONG64 *W=in;
519         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
520         SHA_LONG64      X[16];
521         int i;
522
523                         while (num--) {
524
525         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
526         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
527
528 #ifdef B_ENDIAN
529         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
530         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
531         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
532         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
533         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
534         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
535         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
536         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
537         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
538         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
539         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
540         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
541         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
542         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
543         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
544         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
545 #else
546         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
547         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
548         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
549         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
550         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
551         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
552         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
553         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
554         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
555         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
556         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
557         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
558         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
559         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
560         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
561         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
562 #endif
563
564         for (i=16;i<80;i+=16)
565                 {
566                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
567                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
568                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
569                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
570                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
571                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
572                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
573                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
574                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
575                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
576                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
577                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
578                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
579                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
580                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
581                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
582                 }
583
584         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
585         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
586
587                         W+=SHA_LBLOCK;
588                         }
589         }
590
591 #endif
592
593 #endif /* SHA512_ASM */
594
595 #endif /* OPENSSL_NO_SHA512 */