Typo in commit #16187.
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__)
58 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
59 #endif
60
61 int SHA384_Init (SHA512_CTX *c)
62         {
63         c->h[0]=U64(0xcbbb9d5dc1059ed8);
64         c->h[1]=U64(0x629a292a367cd507);
65         c->h[2]=U64(0x9159015a3070dd17);
66         c->h[3]=U64(0x152fecd8f70e5939);
67         c->h[4]=U64(0x67332667ffc00b31);
68         c->h[5]=U64(0x8eb44a8768581511);
69         c->h[6]=U64(0xdb0c2e0d64f98fa7);
70         c->h[7]=U64(0x47b5481dbefa4fa4);
71         c->Nl=0;        c->Nh=0;
72         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
73         return 1;
74         }
75
76 int SHA512_Init (SHA512_CTX *c)
77         {
78         c->h[0]=U64(0x6a09e667f3bcc908);
79         c->h[1]=U64(0xbb67ae8584caa73b);
80         c->h[2]=U64(0x3c6ef372fe94f82b);
81         c->h[3]=U64(0xa54ff53a5f1d36f1);
82         c->h[4]=U64(0x510e527fade682d1);
83         c->h[5]=U64(0x9b05688c2b3e6c1f);
84         c->h[6]=U64(0x1f83d9abfb41bd6b);
85         c->h[7]=U64(0x5be0cd19137e2179);
86         c->Nl=0;        c->Nh=0;
87         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
88         return 1;
89         }
90
91 #ifndef SHA512_ASM
92 static
93 #endif
94 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
95
96 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
97         {
98         unsigned char *p=(unsigned char *)c->u.p;
99         size_t n=c->num;
100
101         p[n]=0x80;      /* There always is a room for one */
102         n++;
103         if (n > (sizeof(c->u)-16))
104                 memset (p+n,0,sizeof(c->u)-n), n=0,
105                 sha512_block_data_order (c,p,1);
106
107         memset (p+n,0,sizeof(c->u)-16-n);
108 #ifdef  B_ENDIAN
109         c->u.d[SHA_LBLOCK-2] = c->Nh;
110         c->u.d[SHA_LBLOCK-1] = c->Nl;
111 #else
112         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
113         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
114         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
115         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
116         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
117         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
118         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
119         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
120         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
121         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
122         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
123         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
124         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
125         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
126         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
127         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
128 #endif
129
130         sha512_block_data_order (c,p,1);
131
132         if (md==0) return 0;
133
134         switch (c->md_len)
135                 {
136                 /* Let compiler decide if it's appropriate to unroll... */
137                 case SHA384_DIGEST_LENGTH:
138                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
139                                 {
140                                 SHA_LONG64 t = c->h[n];
141
142                                 *(md++) = (unsigned char)(t>>56);
143                                 *(md++) = (unsigned char)(t>>48);
144                                 *(md++) = (unsigned char)(t>>40);
145                                 *(md++) = (unsigned char)(t>>32);
146                                 *(md++) = (unsigned char)(t>>24);
147                                 *(md++) = (unsigned char)(t>>16);
148                                 *(md++) = (unsigned char)(t>>8);
149                                 *(md++) = (unsigned char)(t);
150                                 }
151                         break;
152                 case SHA512_DIGEST_LENGTH:
153                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
154                                 {
155                                 SHA_LONG64 t = c->h[n];
156
157                                 *(md++) = (unsigned char)(t>>56);
158                                 *(md++) = (unsigned char)(t>>48);
159                                 *(md++) = (unsigned char)(t>>40);
160                                 *(md++) = (unsigned char)(t>>32);
161                                 *(md++) = (unsigned char)(t>>24);
162                                 *(md++) = (unsigned char)(t>>16);
163                                 *(md++) = (unsigned char)(t>>8);
164                                 *(md++) = (unsigned char)(t);
165                                 }
166                         break;
167                 /* ... as well as make sure md_len is not abused. */
168                 default:        return 0;
169                 }
170
171         return 1;
172         }
173
174 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
175 {   return SHA512_Final (md,c);   }
176
177 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
178         {
179         SHA_LONG64      l;
180         unsigned char  *p=c->u.p;
181         const unsigned char *data=(const unsigned char *)_data;
182
183         if (len==0) return  1;
184
185         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
186         if (l < c->Nl)          c->Nh++;
187         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
188         c->Nl=l;
189
190         if (c->num != 0)
191                 {
192                 size_t n = sizeof(c->u) - c->num;
193
194                 if (len < n)
195                         {
196                         memcpy (p+c->num,data,len), c->num += len;
197                         return 1;
198                         }
199                 else    {
200                         memcpy (p+c->num,data,n), c->num = 0;
201                         len-=n, data+=n;
202                         sha512_block_data_order (c,p,1);
203                         }
204                 }
205
206         if (len >= sizeof(c->u))
207                 {
208 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
209                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
210                         while (len >= sizeof(c->u))
211                                 memcpy (p,data,sizeof(c->u)),
212                                 sha512_block_data_order (c,p,1),
213                                 len  -= sizeof(c->u),
214                                 data += sizeof(c->u);
215                 else
216 #endif
217                         sha512_block_data_order (c,data,len/sizeof(c->u)),
218                         data += len,
219                         len  %= sizeof(c->u),
220                         data -= len;
221                 }
222
223         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
224
225         return 1;
226         }
227
228 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
229 {   return SHA512_Update (c,data,len);   }
230
231 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
232 {   sha512_block_data_order (c,data,1);  }
233
234 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
235         {
236         SHA512_CTX c;
237         static unsigned char m[SHA384_DIGEST_LENGTH];
238
239         if (md == NULL) md=m;
240         SHA384_Init(&c);
241         SHA512_Update(&c,d,n);
242         SHA512_Final(md,&c);
243         OPENSSL_cleanse(&c,sizeof(c));
244         return(md);
245         }
246
247 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
248         {
249         SHA512_CTX c;
250         static unsigned char m[SHA512_DIGEST_LENGTH];
251
252         if (md == NULL) md=m;
253         SHA512_Init(&c);
254         SHA512_Update(&c,d,n);
255         SHA512_Final(md,&c);
256         OPENSSL_cleanse(&c,sizeof(c));
257         return(md);
258         }
259
260 #ifndef SHA512_ASM
261 static const SHA_LONG64 K512[80] = {
262         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
263         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
264         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
265         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
266         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
267         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
268         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
269         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
270         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
271         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
272         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
273         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
274         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
275         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
276         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
277         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
278         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
279         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
280         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
281         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
282         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
283         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
284         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
285         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
286         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
287         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
288         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
289         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
290         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
291         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
292         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
293         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
294         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
295         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
296         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
297         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
298         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
299         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
300         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
301         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
302
303 #ifndef PEDANTIC
304 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
305 #  if defined(__x86_64) || defined(__x86_64__)
306 #   define ROTR(a,n)    ({ unsigned long ret;           \
307                                 asm ("rorq %1,%0"       \
308                                 : "=r"(ret)             \
309                                 : "J"(n),"0"(a)         \
310                                 : "cc"); ret;           })
311 #   if !defined(B_ENDIAN)
312 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
313                                 asm ("bswapq    %0"             \
314                                 : "=r"(ret)                     \
315                                 : "0"(ret)); ret;               })
316 #   endif
317 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
318 #   if defined(I386_ONLY)
319 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
320                         unsigned int hi,lo;                     \
321                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
322                                     "roll $16,%%eax; roll $16,%%edx; "\
323                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
324                                 : "=a"(lo),"=d"(hi)             \
325                                 : "0"(p[1]),"1"(p[0]) : "cc");  \
326                                 ((SHA_LONG64)hi)<<32|lo;        })
327 #   else
328 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
329                         unsigned int hi,lo;                     \
330                                 asm ("bswapl %0; bswapl %1;"    \
331                                 : "=r"(lo),"=r"(hi)             \
332                                 : "0"(p[1]),"1"(p[0]));         \
333                                 ((SHA_LONG64)hi)<<32|lo;        })
334 #   endif
335 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
336 #   define ROTR(a,n)    ({ unsigned long ret;           \
337                                 asm ("rotrdi %0,%1,%2"  \
338                                 : "=r"(ret)             \
339                                 : "r"(a),"K"(n)); ret;  })
340 #  endif
341 # elif defined(_MSC_VER)
342 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
343 #   define ROTR(a,n)    _rotr64((a),n)
344 #  endif
345 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
346 #   if defined(I386_ONLY)
347     static SHA_LONG64 __fastcall __pull64be(const void *x)
348     {   _asm    mov     edx, [ecx + 0]
349         _asm    mov     eax, [ecx + 4]
350         _asm    xchg    dh,dl
351         _asm    xchg    ah,al
352         _asm    rol     edx,16
353         _asm    rol     eax,16
354         _asm    xchg    dh,dl
355         _asm    xchg    ah,al
356     }
357 #   else
358     static SHA_LONG64 __fastcall __pull64be(const void *x)
359     {   _asm    mov     edx, [ecx + 0]
360         _asm    mov     eax, [ecx + 4]
361         _asm    bswap   edx
362         _asm    bswap   eax
363     }
364 #   endif
365 #   define PULL64(x) __pull64be(&(x))
366 #  endif
367 # endif
368 #endif
369
370 #ifndef PULL64
371 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
372 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
373 #endif
374
375 #ifndef ROTR
376 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
377 #endif
378
379 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
380 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
381 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
382 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
383
384 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
385 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
386
387 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
388 #define GO_FOR_SSE2(ctx,in,num)         do {            \
389         void    sha512_block_sse2(void *,const void *,size_t);  \
390         if (!(OPENSSL_ia32cap_P & (1<<26))) break;      \
391         sha512_block_sse2(ctx->h,in,num); return;       \
392                                         } while (0)
393 #endif
394
395 #ifdef OPENSSL_SMALL_FOOTPRINT
396
397 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
398         {
399         const SHA_LONG64 *W=in;
400         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
401         SHA_LONG64      X[16];
402         int i;
403
404 #ifdef GO_FOR_SSE2
405         GO_FOR_SSE2(ctx,in,num);
406 #endif
407
408                         while (num--) {
409
410         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
411         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
412
413         for (i=0;i<16;i++)
414                 {
415 #ifdef B_ENDIAN
416                 T1 = X[i] = W[i];
417 #else
418                 T1 = X[i] = PULL64(W[i]);
419 #endif
420                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
421                 T2 = Sigma0(a) + Maj(a,b,c);
422                 h = g;  g = f;  f = e;  e = d + T1;
423                 d = c;  c = b;  b = a;  a = T1 + T2;
424                 }
425
426         for (;i<80;i++)
427                 {
428                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
429                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
430
431                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
432                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
433                 T2 = Sigma0(a) + Maj(a,b,c);
434                 h = g;  g = f;  f = e;  e = d + T1;
435                 d = c;  c = b;  b = a;  a = T1 + T2;
436                 }
437
438         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
439         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
440
441                         W+=SHA_LBLOCK;
442                         }
443         }
444
445 #else
446
447 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
448         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
449         h = Sigma0(a) + Maj(a,b,c);                     \
450         d += T1;        h += T1;                } while (0)
451
452 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X)        do {    \
453         s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);        \
454         s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);        \
455         T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];    \
456         ROUND_00_15(i,a,b,c,d,e,f,g,h);         } while (0)
457
458 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
459         {
460         const SHA_LONG64 *W=in;
461         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
462         SHA_LONG64      X[16];
463         int i;
464
465 #ifdef GO_FOR_SSE2
466         GO_FOR_SSE2(ctx,in,num);
467 #endif
468
469                         while (num--) {
470
471         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
472         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
473
474 #ifdef B_ENDIAN
475         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
476         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
477         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
478         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
479         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
480         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
481         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
482         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
483         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
484         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
485         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
486         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
487         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
488         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
489         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
490         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
491 #else
492         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
493         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
494         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
495         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
496         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
497         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
498         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
499         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
500         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
501         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
502         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
503         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
504         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
505         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
506         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
507         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
508 #endif
509
510         for (i=16;i<80;i+=8)
511                 {
512                 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
513                 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
514                 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
515                 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
516                 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
517                 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
518                 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
519                 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
520                 }
521
522         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
523         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
524
525                         W+=SHA_LBLOCK;
526                         }
527         }
528
529 #endif
530
531 #endif /* SHA512_ASM */
532
533 #endif /* OPENSSL_NO_SHA512 */