ARM assembler pack: profiler-assisted optimizations and NEON support.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__) || \
58     defined(SHA512_ASM)
59 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60 #endif
61
62 int SHA384_Init (SHA512_CTX *c)
63         {
64         c->h[0]=U64(0xcbbb9d5dc1059ed8);
65         c->h[1]=U64(0x629a292a367cd507);
66         c->h[2]=U64(0x9159015a3070dd17);
67         c->h[3]=U64(0x152fecd8f70e5939);
68         c->h[4]=U64(0x67332667ffc00b31);
69         c->h[5]=U64(0x8eb44a8768581511);
70         c->h[6]=U64(0xdb0c2e0d64f98fa7);
71         c->h[7]=U64(0x47b5481dbefa4fa4);
72
73         c->Nl=0;        c->Nh=0;
74         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
75         return 1;
76         }
77
78 int SHA512_Init (SHA512_CTX *c)
79         {
80         c->h[0]=U64(0x6a09e667f3bcc908);
81         c->h[1]=U64(0xbb67ae8584caa73b);
82         c->h[2]=U64(0x3c6ef372fe94f82b);
83         c->h[3]=U64(0xa54ff53a5f1d36f1);
84         c->h[4]=U64(0x510e527fade682d1);
85         c->h[5]=U64(0x9b05688c2b3e6c1f);
86         c->h[6]=U64(0x1f83d9abfb41bd6b);
87         c->h[7]=U64(0x5be0cd19137e2179);
88
89         c->Nl=0;        c->Nh=0;
90         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
91         return 1;
92         }
93
94 #ifndef SHA512_ASM
95 static
96 #endif
97 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
98
99 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
100         {
101         unsigned char *p=(unsigned char *)c->u.p;
102         size_t n=c->num;
103
104         p[n]=0x80;      /* There always is a room for one */
105         n++;
106         if (n > (sizeof(c->u)-16))
107                 memset (p+n,0,sizeof(c->u)-n), n=0,
108                 sha512_block_data_order (c,p,1);
109
110         memset (p+n,0,sizeof(c->u)-16-n);
111 #ifdef  B_ENDIAN
112         c->u.d[SHA_LBLOCK-2] = c->Nh;
113         c->u.d[SHA_LBLOCK-1] = c->Nl;
114 #else
115         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
116         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
117         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
118         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
119         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
120         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
121         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
122         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
123         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
124         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
125         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
126         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
127         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
128         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
129         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
130         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
131 #endif
132
133         sha512_block_data_order (c,p,1);
134
135         if (md==0) return 0;
136
137         switch (c->md_len)
138                 {
139                 /* Let compiler decide if it's appropriate to unroll... */
140                 case SHA384_DIGEST_LENGTH:
141                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
142                                 {
143                                 SHA_LONG64 t = c->h[n];
144
145                                 *(md++) = (unsigned char)(t>>56);
146                                 *(md++) = (unsigned char)(t>>48);
147                                 *(md++) = (unsigned char)(t>>40);
148                                 *(md++) = (unsigned char)(t>>32);
149                                 *(md++) = (unsigned char)(t>>24);
150                                 *(md++) = (unsigned char)(t>>16);
151                                 *(md++) = (unsigned char)(t>>8);
152                                 *(md++) = (unsigned char)(t);
153                                 }
154                         break;
155                 case SHA512_DIGEST_LENGTH:
156                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
157                                 {
158                                 SHA_LONG64 t = c->h[n];
159
160                                 *(md++) = (unsigned char)(t>>56);
161                                 *(md++) = (unsigned char)(t>>48);
162                                 *(md++) = (unsigned char)(t>>40);
163                                 *(md++) = (unsigned char)(t>>32);
164                                 *(md++) = (unsigned char)(t>>24);
165                                 *(md++) = (unsigned char)(t>>16);
166                                 *(md++) = (unsigned char)(t>>8);
167                                 *(md++) = (unsigned char)(t);
168                                 }
169                         break;
170                 /* ... as well as make sure md_len is not abused. */
171                 default:        return 0;
172                 }
173
174         return 1;
175         }
176
177 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
178 {   return SHA512_Final (md,c);   }
179
180 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
181         {
182         SHA_LONG64      l;
183         unsigned char  *p=c->u.p;
184         const unsigned char *data=(const unsigned char *)_data;
185
186         if (len==0) return  1;
187
188         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
189         if (l < c->Nl)          c->Nh++;
190         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
191         c->Nl=l;
192
193         if (c->num != 0)
194                 {
195                 size_t n = sizeof(c->u) - c->num;
196
197                 if (len < n)
198                         {
199                         memcpy (p+c->num,data,len), c->num += (unsigned int)len;
200                         return 1;
201                         }
202                 else    {
203                         memcpy (p+c->num,data,n), c->num = 0;
204                         len-=n, data+=n;
205                         sha512_block_data_order (c,p,1);
206                         }
207                 }
208
209         if (len >= sizeof(c->u))
210                 {
211 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
212                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
213                         while (len >= sizeof(c->u))
214                                 memcpy (p,data,sizeof(c->u)),
215                                 sha512_block_data_order (c,p,1),
216                                 len  -= sizeof(c->u),
217                                 data += sizeof(c->u);
218                 else
219 #endif
220                         sha512_block_data_order (c,data,len/sizeof(c->u)),
221                         data += len,
222                         len  %= sizeof(c->u),
223                         data -= len;
224                 }
225
226         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
227
228         return 1;
229         }
230
231 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
232 {   return SHA512_Update (c,data,len);   }
233
234 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
235 {   sha512_block_data_order (c,data,1);  }
236
237 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
238         {
239         SHA512_CTX c;
240         static unsigned char m[SHA384_DIGEST_LENGTH];
241
242         if (md == NULL) md=m;
243         SHA384_Init(&c);
244         SHA512_Update(&c,d,n);
245         SHA512_Final(md,&c);
246         OPENSSL_cleanse(&c,sizeof(c));
247         return(md);
248         }
249
250 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
251         {
252         SHA512_CTX c;
253         static unsigned char m[SHA512_DIGEST_LENGTH];
254
255         if (md == NULL) md=m;
256         SHA512_Init(&c);
257         SHA512_Update(&c,d,n);
258         SHA512_Final(md,&c);
259         OPENSSL_cleanse(&c,sizeof(c));
260         return(md);
261         }
262
263 #ifndef SHA512_ASM
264 static const SHA_LONG64 K512[80] = {
265         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
266         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
267         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
268         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
269         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
270         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
271         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
272         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
273         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
274         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
275         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
276         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
277         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
278         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
279         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
280         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
281         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
282         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
283         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
284         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
285         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
286         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
287         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
288         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
289         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
290         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
291         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
292         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
293         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
294         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
295         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
296         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
297         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
298         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
299         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
300         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
301         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
302         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
303         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
304         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
305
306 #ifndef PEDANTIC
307 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
308 #  if defined(__x86_64) || defined(__x86_64__)
309 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
310                                 asm ("rorq %1,%0"       \
311                                 : "=r"(ret)             \
312                                 : "J"(n),"0"(a)         \
313                                 : "cc"); ret;           })
314 #   if !defined(B_ENDIAN)
315 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
316                                 asm ("bswapq    %0"             \
317                                 : "=r"(ret)                     \
318                                 : "0"(ret)); ret;               })
319 #   endif
320 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
321 #   if defined(I386_ONLY)
322 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
323                          unsigned int hi=p[0],lo=p[1];          \
324                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
325                                     "roll $16,%%eax; roll $16,%%edx; "\
326                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
327                                 : "=a"(lo),"=d"(hi)             \
328                                 : "0"(lo),"1"(hi) : "cc");      \
329                                 ((SHA_LONG64)hi)<<32|lo;        })
330 #   else
331 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
332                          unsigned int hi=p[0],lo=p[1];          \
333                                 asm ("bswapl %0; bswapl %1;"    \
334                                 : "=r"(lo),"=r"(hi)             \
335                                 : "0"(lo),"1"(hi));             \
336                                 ((SHA_LONG64)hi)<<32|lo;        })
337 #   endif
338 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
339 #   define ROTR(a,n)    ({ SHA_LONG64 ret;              \
340                                 asm ("rotrdi %0,%1,%2"  \
341                                 : "=r"(ret)             \
342                                 : "r"(a),"K"(n)); ret;  })
343 #  endif
344 # elif defined(_MSC_VER)
345 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
346 #   pragma intrinsic(_rotr64)
347 #   define ROTR(a,n)    _rotr64((a),n)
348 #  endif
349 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
350 #   if defined(I386_ONLY)
351     static SHA_LONG64 __fastcall __pull64be(const void *x)
352     {   _asm    mov     edx, [ecx + 0]
353         _asm    mov     eax, [ecx + 4]
354         _asm    xchg    dh,dl
355         _asm    xchg    ah,al
356         _asm    rol     edx,16
357         _asm    rol     eax,16
358         _asm    xchg    dh,dl
359         _asm    xchg    ah,al
360     }
361 #   else
362     static SHA_LONG64 __fastcall __pull64be(const void *x)
363     {   _asm    mov     edx, [ecx + 0]
364         _asm    mov     eax, [ecx + 4]
365         _asm    bswap   edx
366         _asm    bswap   eax
367     }
368 #   endif
369 #   define PULL64(x) __pull64be(&(x))
370 #   if _MSC_VER<=1200
371 #    pragma inline_depth(0)
372 #   endif
373 #  endif
374 # endif
375 #endif
376
377 #ifndef PULL64
378 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
379 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
380 #endif
381
382 #ifndef ROTR
383 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
384 #endif
385
386 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
387 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
388 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
389 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
390
391 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
392 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
393
394
395 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
396 /*
397  * This code should give better results on 32-bit CPU with less than
398  * ~24 registers, both size and performance wise...
399  */
400 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
401         {
402         const SHA_LONG64 *W=in;
403         SHA_LONG64      A,E,T;
404         SHA_LONG64      X[9+80],*F;
405         int i;
406
407                         while (num--) {
408
409         F    = X+80;
410         A    = ctx->h[0];       F[1] = ctx->h[1];
411         F[2] = ctx->h[2];       F[3] = ctx->h[3];
412         E    = ctx->h[4];       F[5] = ctx->h[5];
413         F[6] = ctx->h[6];       F[7] = ctx->h[7];
414
415         for (i=0;i<16;i++,F--)
416                 {
417 #ifdef B_ENDIAN
418                 T = W[i];
419 #else
420                 T = PULL64(W[i]);
421 #endif
422                 F[0] = A;
423                 F[4] = E;
424                 F[8] = T;
425                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
426                 E    = F[3] + T;
427                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
428                 }
429
430         for (;i<80;i++,F--)
431                 {
432                 T    = sigma0(F[8+16-1]);
433                 T   += sigma1(F[8+16-14]);
434                 T   += F[8+16] + F[8+16-9];
435
436                 F[0] = A;
437                 F[4] = E;
438                 F[8] = T;
439                 T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
440                 E    = F[3] + T;
441                 A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
442                 }
443
444         ctx->h[0] += A;         ctx->h[1] += F[1];
445         ctx->h[2] += F[2];      ctx->h[3] += F[3];
446         ctx->h[4] += E;         ctx->h[5] += F[5];
447         ctx->h[6] += F[6];      ctx->h[7] += F[7];
448
449                         W+=SHA_LBLOCK;
450                         }
451         }
452
453 #elif defined(OPENSSL_SMALL_FOOTPRINT)
454
455 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
456         {
457         const SHA_LONG64 *W=in;
458         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
459         SHA_LONG64      X[16];
460         int i;
461
462                         while (num--) {
463
464         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
465         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
466
467         for (i=0;i<16;i++)
468                 {
469 #ifdef B_ENDIAN
470                 T1 = X[i] = W[i];
471 #else
472                 T1 = X[i] = PULL64(W[i]);
473 #endif
474                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
475                 T2 = Sigma0(a) + Maj(a,b,c);
476                 h = g;  g = f;  f = e;  e = d + T1;
477                 d = c;  c = b;  b = a;  a = T1 + T2;
478                 }
479
480         for (;i<80;i++)
481                 {
482                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
483                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
484
485                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
486                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
487                 T2 = Sigma0(a) + Maj(a,b,c);
488                 h = g;  g = f;  f = e;  e = d + T1;
489                 d = c;  c = b;  b = a;  a = T1 + T2;
490                 }
491
492         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
493         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
494
495                         W+=SHA_LBLOCK;
496                         }
497         }
498
499 #else
500
501 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
502         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
503         h = Sigma0(a) + Maj(a,b,c);                     \
504         d += T1;        h += T1;                } while (0)
505
506 #define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
507         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
508         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
509         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
510         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
511
512 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
513         {
514         const SHA_LONG64 *W=in;
515         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
516         SHA_LONG64      X[16];
517         int i;
518
519                         while (num--) {
520
521         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
522         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
523
524 #ifdef B_ENDIAN
525         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
526         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
527         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
528         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
529         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
530         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
531         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
532         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
533         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
534         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
535         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
536         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
537         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
538         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
539         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
540         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
541 #else
542         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
543         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
544         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
545         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
546         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
547         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
548         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
549         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
550         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
551         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
552         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
553         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
554         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
555         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
556         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
557         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
558 #endif
559
560         for (i=16;i<80;i+=16)
561                 {
562                 ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
563                 ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
564                 ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
565                 ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
566                 ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
567                 ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
568                 ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
569                 ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
570                 ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
571                 ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
572                 ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
573                 ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
574                 ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
575                 ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
576                 ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
577                 ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
578                 }
579
580         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
581         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
582
583                         W+=SHA_LBLOCK;
584                         }
585         }
586
587 #endif
588
589 #endif /* SHA512_ASM */
590
591 #else /* !OPENSSL_NO_SHA512 */
592
593 #if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
594 static void *dummy=&dummy;
595 #endif
596
597 #endif /* !OPENSSL_NO_SHA512 */