Allow for dynamic base in Win64 FIPS module.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 __fips_constseg
54 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
55
56 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
57     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
58     defined(__s390__) || defined(__s390x__) || \
59     defined(SHA512_ASM)
60 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61 #endif
62
63 int SHA384_Init (SHA512_CTX *c)
64         {
65         c->h[0]=U64(0xcbbb9d5dc1059ed8);
66         c->h[1]=U64(0x629a292a367cd507);
67         c->h[2]=U64(0x9159015a3070dd17);
68         c->h[3]=U64(0x152fecd8f70e5939);
69         c->h[4]=U64(0x67332667ffc00b31);
70         c->h[5]=U64(0x8eb44a8768581511);
71         c->h[6]=U64(0xdb0c2e0d64f98fa7);
72         c->h[7]=U64(0x47b5481dbefa4fa4);
73
74         c->Nl=0;        c->Nh=0;
75         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
76         return 1;
77         }
78
79 int SHA512_Init (SHA512_CTX *c)
80         {
81         c->h[0]=U64(0x6a09e667f3bcc908);
82         c->h[1]=U64(0xbb67ae8584caa73b);
83         c->h[2]=U64(0x3c6ef372fe94f82b);
84         c->h[3]=U64(0xa54ff53a5f1d36f1);
85         c->h[4]=U64(0x510e527fade682d1);
86         c->h[5]=U64(0x9b05688c2b3e6c1f);
87         c->h[6]=U64(0x1f83d9abfb41bd6b);
88         c->h[7]=U64(0x5be0cd19137e2179);
89
90         c->Nl=0;        c->Nh=0;
91         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
92         return 1;
93         }
94
95 #ifndef SHA512_ASM
96 static
97 #endif
98 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
99
100 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
101         {
102         unsigned char *p=(unsigned char *)c->u.p;
103         size_t n=c->num;
104
105         p[n]=0x80;      /* There always is a room for one */
106         n++;
107         if (n > (sizeof(c->u)-16))
108                 memset (p+n,0,sizeof(c->u)-n), n=0,
109                 sha512_block_data_order (c,p,1);
110
111         memset (p+n,0,sizeof(c->u)-16-n);
112 #ifdef  B_ENDIAN
113         c->u.d[SHA_LBLOCK-2] = c->Nh;
114         c->u.d[SHA_LBLOCK-1] = c->Nl;
115 #else
116         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
117         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
118         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
119         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
120         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
121         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
122         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
123         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
124         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
125         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
126         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
127         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
128         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
129         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
130         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
131         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
132 #endif
133
134         sha512_block_data_order (c,p,1);
135
136         if (md==0) return 0;
137
138         switch (c->md_len)
139                 {
140                 /* Let compiler decide if it's appropriate to unroll... */
141                 case SHA384_DIGEST_LENGTH:
142                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
143                                 {
144                                 SHA_LONG64 t = c->h[n];
145
146                                 *(md++) = (unsigned char)(t>>56);
147                                 *(md++) = (unsigned char)(t>>48);
148                                 *(md++) = (unsigned char)(t>>40);
149                                 *(md++) = (unsigned char)(t>>32);
150                                 *(md++) = (unsigned char)(t>>24);
151                                 *(md++) = (unsigned char)(t>>16);
152                                 *(md++) = (unsigned char)(t>>8);
153                                 *(md++) = (unsigned char)(t);
154                                 }
155                         break;
156                 case SHA512_DIGEST_LENGTH:
157                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
158                                 {
159                                 SHA_LONG64 t = c->h[n];
160
161                                 *(md++) = (unsigned char)(t>>56);
162                                 *(md++) = (unsigned char)(t>>48);
163                                 *(md++) = (unsigned char)(t>>40);
164                                 *(md++) = (unsigned char)(t>>32);
165                                 *(md++) = (unsigned char)(t>>24);
166                                 *(md++) = (unsigned char)(t>>16);
167                                 *(md++) = (unsigned char)(t>>8);
168                                 *(md++) = (unsigned char)(t);
169                                 }
170                         break;
171                 /* ... as well as make sure md_len is not abused. */
172                 default:        return 0;
173                 }
174
175         return 1;
176         }
177
178 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
179 {   return SHA512_Final (md,c);   }
180
181 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
182         {
183         SHA_LONG64      l;
184         unsigned char  *p=c->u.p;
185         const unsigned char *data=(const unsigned char *)_data;
186
187         if (len==0) return  1;
188
189         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
190         if (l < c->Nl)          c->Nh++;
191         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
192         c->Nl=l;
193
194         if (c->num != 0)
195                 {
196                 size_t n = sizeof(c->u) - c->num;
197
198                 if (len < n)
199                         {
200                         memcpy (p+c->num,data,len), c->num += (unsigned int)len;
201                         return 1;
202                         }
203                 else    {
204                         memcpy (p+c->num,data,n), c->num = 0;
205                         len-=n, data+=n;
206                         sha512_block_data_order (c,p,1);
207                         }
208                 }
209
210         if (len >= sizeof(c->u))
211                 {
212 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
213                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
214                         while (len >= sizeof(c->u))
215                                 memcpy (p,data,sizeof(c->u)),
216                                 sha512_block_data_order (c,p,1),
217                                 len  -= sizeof(c->u),
218                                 data += sizeof(c->u);
219                 else
220 #endif
221                         sha512_block_data_order (c,data,len/sizeof(c->u)),
222                         data += len,
223                         len  %= sizeof(c->u),
224                         data -= len;
225                 }
226
227         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
228
229         return 1;
230         }
231
232 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
233 {   return SHA512_Update (c,data,len);   }
234
235 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
236 {   sha512_block_data_order (c,data,1);  }
237
238 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
239         {
240         SHA512_CTX c;
241         static unsigned char m[SHA384_DIGEST_LENGTH];
242
243         if (md == NULL) md=m;
244         SHA384_Init(&c);
245         SHA512_Update(&c,d,n);
246         SHA512_Final(md,&c);
247         OPENSSL_cleanse(&c,sizeof(c));
248         return(md);
249         }
250
251 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
252         {
253         SHA512_CTX c;
254         static unsigned char m[SHA512_DIGEST_LENGTH];
255
256         if (md == NULL) md=m;
257         SHA512_Init(&c);
258         SHA512_Update(&c,d,n);
259         SHA512_Final(md,&c);
260         OPENSSL_cleanse(&c,sizeof(c));
261         return(md);
262         }
263
264 #ifndef SHA512_ASM
265 __fips_constseg
266 static const SHA_LONG64 K512[80] = {
267         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
268         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
269         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
270         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
271         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
272         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
273         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
274         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
275         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
276         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
277         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
278         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
279         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
280         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
281         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
282         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
283         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
284         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
285         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
286         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
287         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
288         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
289         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
290         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
291         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
292         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
293         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
294         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
295         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
296         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
297         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
298         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
299         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
300         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
301         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
302         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
303         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
304         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
305         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
306         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
307
308 #ifndef PEDANTIC
309 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
310 #  if defined(__x86_64) || defined(__x86_64__)
311 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
312                                 asm ("rorq %1,%0"       \
313                                 : "=r"(ret)             \
314                                 : "J"(n),"0"(a)         \
315                                 : "cc"); ret;           })
316 #   if !defined(B_ENDIAN)
317 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
318                                 asm ("bswapq    %0"             \
319                                 : "=r"(ret)                     \
320                                 : "0"(ret)); ret;               })
321 #   endif
322 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
323 #   if defined(I386_ONLY)
324 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
325                          unsigned int hi=p[0],lo=p[1];          \
326                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
327                                     "roll $16,%%eax; roll $16,%%edx; "\
328                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
329                                 : "=a"(lo),"=d"(hi)             \
330                                 : "0"(lo),"1"(hi) : "cc");      \
331                                 ((SHA_LONG64)hi)<<32|lo;        })
332 #   else
333 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
334                          unsigned int hi=p[0],lo=p[1];          \
335                                 asm ("bswapl %0; bswapl %1;"    \
336                                 : "=r"(lo),"=r"(hi)             \
337                                 : "0"(lo),"1"(hi));             \
338                                 ((SHA_LONG64)hi)<<32|lo;        })
339 #   endif
340 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
341 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
342                                 asm ("rotrdi %0,%1,%2"  \
343                                 : "=r"(ret)             \
344                                 : "r"(a),"K"(n)); ret;  })
345 #  endif
346 # elif defined(_MSC_VER)
347 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
348 #   pragma intrinsic(_rotr64)
349 #   define ROTR(a,n)    _rotr64((a),n)
350 #  endif
351 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
352 #   if defined(I386_ONLY)
353     static SHA_LONG64 __fastcall __pull64be(const void *x)
354     {   _asm    mov     edx, [ecx + 0]
355         _asm    mov     eax, [ecx + 4]
356         _asm    xchg    dh,dl
357         _asm    xchg    ah,al
358         _asm    rol     edx,16
359         _asm    rol     eax,16
360         _asm    xchg    dh,dl
361         _asm    xchg    ah,al
362     }
363 #   else
364     static SHA_LONG64 __fastcall __pull64be(const void *x)
365     {   _asm    mov     edx, [ecx + 0]
366         _asm    mov     eax, [ecx + 4]
367         _asm    bswap   edx
368         _asm    bswap   eax
369     }
370 #   endif
371 #   define PULL64(x) __pull64be(&(x))
372 #   if _MSC_VER<=1200
373 #    pragma inline_depth(0)
374 #   endif
375 #  endif
376 # endif
377 #endif
378
379 #ifndef PULL64
380 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
381 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
382 #endif
383
384 #ifndef ROTR
385 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
386 #endif
387
388 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
389 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
390 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
391 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
392
393 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
394 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
395
396
397 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
398 /*
399  * This code should give better results on 32-bit CPU with less than
400  * ~24 registers, both size and performance wise...
401  */
402 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
403         {
404         const SHA_LONG64 *W=in;
405         SHA_LONG64      A,E,T;
406         SHA_LONG64      X[9+80],*F;
407         int i;
408
409                         while (num--) {
410
411         F    = X+80;
412         A    = ctx->h[0];       F[1] = ctx->h[1];
413         F[2] = ctx->h[2];       F[3] = ctx->h[3];
414         E    = ctx->h[4];       F[5] = ctx->h[5];
415         F[6] = ctx->h[6];       F[7] = ctx->h[7];
416
417         for (i=0;i<16;i++,F--)
418                 {
419 #ifdef B_ENDIAN
420                 T = W[i];
421 #else
422                 T = PULL64(W[i]);
423 #endif
424                 F[0] = A;
425                 F[4] = E;
426                 F[8] = T;
427                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
428                 E    = F[3] + T;
429                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
430                 }
431
432         for (;i<80;i++,F--)
433                 {
434                 T    = sigma0(F[8+16-1]);
435                 T   += sigma1(F[8+16-14]);
436                 T   += F[8+16] + F[8+16-9];
437
438                 F[0] = A;
439                 F[4] = E;
440                 F[8] = T;
441                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
442                 E    = F[3] + T;
443                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
444                 }
445
446         ctx->h[0] += A;         ctx->h[1] += F[1];
447         ctx->h[2] += F[2];      ctx->h[3] += F[3];
448         ctx->h[4] += E;         ctx->h[5] += F[5];
449         ctx->h[6] += F[6];      ctx->h[7] += F[7];
450
451                         W+=SHA_LBLOCK;
452                         }
453         }
454
455 #elif defined(OPENSSL_SMALL_FOOTPRINT)
456
457 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
458         {
459         const SHA_LONG64 *W=in;
460         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
461         SHA_LONG64      X[16];
462         int i;
463
464                         while (num--) {
465
466         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
467         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
468
469         for (i=0;i<16;i++)
470                 {
471 #ifdef B_ENDIAN
472                 T1 = X[i] = W[i];
473 #else
474                 T1 = X[i] = PULL64(W[i]);
475 #endif
476                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
477                 T2 = Sigma0(a) + Maj(a,b,c);
478                 h = g;  g = f;  f = e;  e = d + T1;
479                 d = c;  c = b;  b = a;  a = T1 + T2;
480                 }
481
482         for (;i<80;i++)
483                 {
484                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
485                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
486
487                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
488                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
489                 T2 = Sigma0(a) + Maj(a,b,c);
490                 h = g;  g = f;  f = e;  e = d + T1;
491                 d = c;  c = b;  b = a;  a = T1 + T2;
492                 }
493
494         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
495         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
496
497                         W+=SHA_LBLOCK;
498                         }
499         }
500
501 #else
502
503 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
504         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
505         h = Sigma0(a) + Maj(a,b,c);                     \
506         d += T1;        h += T1;                } while (0)
507
508 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
509         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
510         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
511         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
512         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
513
514 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
515         {
516         const SHA_LONG64 *W=in;
517         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
518         SHA_LONG64      X[16];
519         int i;
520
521                         while (num--) {
522
523         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
524         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
525
526 #ifdef B_ENDIAN
527         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
528         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
529         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
530         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
531         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
532         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
533         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
534         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
535         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
536         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
537         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
538         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
539         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
540         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
541         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
542         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
543 #else
544         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
545         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
546         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
547         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
548         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
549         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
550         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
551         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
552         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
553         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
554         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
555         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
556         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
557         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
558         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
559         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
560 #endif
561
562         for (i=16;i<80;i+=16)
563                 {
564                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
565                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
566                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
567                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
568                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
569                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
570                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
571                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
572                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
573                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
574                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
575                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
576                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
577                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
578                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
579                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
580                 }
581
582         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
583         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
584
585                         W+=SHA_LBLOCK;
586                         }
587         }
588
589 #endif
590
591 #endif /* SHA512_ASM */
592
593 #else /* !OPENSSL_NO_SHA512 */
594
595 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
596 static void *dummy=&dummy;
597 #endif
598
599 #endif /* !OPENSSL_NO_SHA512 */