Further synchronizations with md32_common.h update, consistent naming
[openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char *SHA512_version="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
56 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
57 #endif
58
59 int SHA384_Init (SHA512_CTX *c)
60         {
61         c->h[0]=U64(0xcbbb9d5dc1059ed8);
62         c->h[1]=U64(0x629a292a367cd507);
63         c->h[2]=U64(0x9159015a3070dd17);
64         c->h[3]=U64(0x152fecd8f70e5939);
65         c->h[4]=U64(0x67332667ffc00b31);
66         c->h[5]=U64(0x8eb44a8768581511);
67         c->h[6]=U64(0xdb0c2e0d64f98fa7);
68         c->h[7]=U64(0x47b5481dbefa4fa4);
69         c->Nl=0;        c->Nh=0;
70         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
71         return 1;
72         }
73
74 int SHA512_Init (SHA512_CTX *c)
75         {
76         c->h[0]=U64(0x6a09e667f3bcc908);
77         c->h[1]=U64(0xbb67ae8584caa73b);
78         c->h[2]=U64(0x3c6ef372fe94f82b);
79         c->h[3]=U64(0xa54ff53a5f1d36f1);
80         c->h[4]=U64(0x510e527fade682d1);
81         c->h[5]=U64(0x9b05688c2b3e6c1f);
82         c->h[6]=U64(0x1f83d9abfb41bd6b);
83         c->h[7]=U64(0x5be0cd19137e2179);
84         c->Nl=0;        c->Nh=0;
85         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
86         return 1;
87         }
88
89 #ifndef SHA512_ASM
90 static
91 #endif
92 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
93
94 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
95         {
96         unsigned char *p=(unsigned char *)c->u.p;
97         size_t n=c->num;
98
99         p[n]=0x80;      /* There always is a room for one */
100         n++;
101         if (n > (sizeof(c->u)-16))
102                 memset (p+n,0,sizeof(c->u)-n), n=0,
103                 sha512_block_data_order (c,p,1);
104
105         memset (p+n,0,sizeof(c->u)-16-n);
106 #ifdef  B_ENDIAN
107         c->u.d[SHA_LBLOCK-2] = c->Nh;
108         c->u.d[SHA_LBLOCK-1] = c->Nl;
109 #else
110         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
111         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
112         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
113         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
114         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
115         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
116         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
117         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
118         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
119         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
120         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
121         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
122         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
123         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
124         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
125         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
126 #endif
127
128         sha512_block_data_order (c,p,1);
129
130         if (md==0) return 0;
131
132         switch (c->md_len)
133                 {
134                 /* Let compiler decide if it's appropriate to unroll... */
135                 case SHA384_DIGEST_LENGTH:
136                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
137                                 {
138                                 SHA_LONG64 t = c->h[n];
139
140                                 *(md++) = (unsigned char)(t>>56);
141                                 *(md++) = (unsigned char)(t>>48);
142                                 *(md++) = (unsigned char)(t>>40);
143                                 *(md++) = (unsigned char)(t>>32);
144                                 *(md++) = (unsigned char)(t>>24);
145                                 *(md++) = (unsigned char)(t>>16);
146                                 *(md++) = (unsigned char)(t>>8);
147                                 *(md++) = (unsigned char)(t);
148                                 }
149                         break;
150                 case SHA512_DIGEST_LENGTH:
151                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
152                                 {
153                                 SHA_LONG64 t = c->h[n];
154
155                                 *(md++) = (unsigned char)(t>>56);
156                                 *(md++) = (unsigned char)(t>>48);
157                                 *(md++) = (unsigned char)(t>>40);
158                                 *(md++) = (unsigned char)(t>>32);
159                                 *(md++) = (unsigned char)(t>>24);
160                                 *(md++) = (unsigned char)(t>>16);
161                                 *(md++) = (unsigned char)(t>>8);
162                                 *(md++) = (unsigned char)(t);
163                                 }
164                         break;
165                 /* ... as well as make sure md_len is not abused. */
166                 default:        return 0;
167                 }
168
169         return 1;
170         }
171
172 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
173 {   return SHA512_Final (md,c);   }
174
175 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
176         {
177         SHA_LONG64      l;
178         unsigned char  *p=c->u.p;
179         const unsigned char *data=(const unsigned char *)_data;
180
181         if (len==0) return  1;
182
183         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
184         if (l < c->Nl)          c->Nh++;
185         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
186         c->Nl=l;
187
188         if (c->num != 0)
189                 {
190                 size_t n = sizeof(c->u) - c->num;
191
192                 if (len < n)
193                         {
194                         memcpy (p+c->num,data,len), c->num += len;
195                         return 1;
196                         }
197                 else    {
198                         memcpy (p+c->num,data,n), c->num = 0;
199                         len-=n, data+=n;
200                         sha512_block_data_order (c,p,1);
201                         }
202                 }
203
204         if (len >= sizeof(c->u))
205                 {
206 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
207                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
208                         while (len >= sizeof(c->u))
209                                 memcpy (p,data,sizeof(c->u)),
210                                 sha512_block_data_order (c,p,1),
211                                 len  -= sizeof(c->u),
212                                 data += sizeof(c->u);
213                 else
214 #endif
215                         sha512_block_data_order (c,data,len/sizeof(c->u)),
216                         data += len,
217                         len  %= sizeof(c->u),
218                         data -= len;
219                 }
220
221         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
222
223         return 1;
224         }
225
226 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
227 {   return SHA512_Update (c,data,len);   }
228
229 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
230 {   sha512_block_data_order (c,data,1);  }
231
232 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
233         {
234         SHA512_CTX c;
235         static unsigned char m[SHA384_DIGEST_LENGTH];
236
237         if (md == NULL) md=m;
238         SHA384_Init(&c);
239         SHA512_Update(&c,d,n);
240         SHA512_Final(md,&c);
241         OPENSSL_cleanse(&c,sizeof(c));
242         return(md);
243         }
244
245 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
246         {
247         SHA512_CTX c;
248         static unsigned char m[SHA512_DIGEST_LENGTH];
249
250         if (md == NULL) md=m;
251         SHA512_Init(&c);
252         SHA512_Update(&c,d,n);
253         SHA512_Final(md,&c);
254         OPENSSL_cleanse(&c,sizeof(c));
255         return(md);
256         }
257
258 #ifndef SHA512_ASM
259 static const SHA_LONG64 K512[80] = {
260         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
261         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
262         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
263         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
264         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
265         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
266         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
267         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
268         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
269         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
270         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
271         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
272         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
273         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
274         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
275         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
276         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
277         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
278         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
279         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
280         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
281         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
282         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
283         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
284         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
285         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
286         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
287         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
288         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
289         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
290         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
291         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
292         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
293         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
294         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
295         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
296         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
297         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
298         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
299         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
300
301 #ifndef PEDANTIC
302 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
303 #  if defined(__x86_64) || defined(__x86_64__)
304 #   define ROTR(a,n)    ({ unsigned long ret;           \
305                                 asm ("rorq %1,%0"       \
306                                 : "=r"(ret)             \
307                                 : "J"(n),"0"(a)         \
308                                 : "cc"); ret;           })
309 #   if !defined(B_ENDIAN)
310 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
311                                 asm ("bswapq    %0"             \
312                                 : "=r"(ret)                     \
313                                 : "0"(ret)); ret;               })
314 #   endif
315 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
316 #   if defined(I386_ONLY)
317 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
318                         unsigned int hi,lo;                     \
319                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
320                                     "roll $16,%%eax; roll $16,%%edx; "\
321                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
322                                 : "=a"(lo),"=d"(hi)             \
323                                 : "0"(p[1]),"1"(p[0]) : "cc");  \
324                                 ((SHA_LONG64)hi)<<32|lo;        })
325 #   else
326 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
327                         unsigned int hi,lo;                     \
328                                 asm ("bswapl %0; bswapl %1;"    \
329                                 : "=r"(lo),"=r"(hi)             \
330                                 : "0"(p[1]),"1"(p[0]));         \
331                                 ((SHA_LONG64)hi)<<32|lo;        })
332 #   endif
333 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
334 #   define ROTR(a,n)    ({ unsigned long ret;           \
335                                 asm ("rotrdi %0,%1,%2"  \
336                                 : "=r"(ret)             \
337                                 : "r"(a),"K"(n)); ret;  })
338 #  endif
339 # elif defined(_MSC_VER)
340 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
341 #   define ROTR(a,n)    _rotr64((a),n)
342 #  endif
343 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
344 #   if defined(I386_ONLY)
345     static SHA_LONG64 __fastcall __pull64be(const void *x)
346     {   _asm    mov     edx, [ecx + 0]
347         _asm    mov     eax, [ecx + 4]
348         _asm    xchg    dh,dl
349         _asm    xchg    ah,al
350         _asm    rol     edx,16
351         _asm    rol     eax,16
352         _asm    xchg    dh,dl
353         _asm    xchg    ah,al
354     }
355 #   else
356     static SHA_LONG64 __fastcall __pull64be(const void *x)
357     {   _asm    mov     edx, [ecx + 0]
358         _asm    mov     eax, [ecx + 4]
359         _asm    bswap   edx
360         _asm    bswap   eax
361     }
362 #   endif
363 #   define PULL64(x) __pull64be(&(x))
364 #  endif
365 # endif
366 #endif
367
368 #ifndef PULL64
369 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
370 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
371 #endif
372
373 #ifndef ROTR
374 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
375 #endif
376
377 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
378 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
379 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
380 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
381
382 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
383 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
384
385 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
386 #define GO_FOR_SSE2(ctx,in,num)         do {            \
387         void    sha512_block_sse2(void *,const void *,size_t);  \
388         if (!(OPENSSL_ia32cap_P & (1<<26))) break;      \
389         sha512_block_sse2(ctx->h,in,num); return;       \
390                                         } while (0)
391 #endif
392
393 #ifdef OPENSSL_SMALL_FOOTPRINT
394
395 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
396         {
397         const SHA_LONG64 *W=in;
398         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
399         SHA_LONG64      X[16];
400         int i;
401
402 #ifdef GO_FOR_SSE2
403         GO_FOR_SSE2(ctx,in,num);
404 #endif
405
406                         while (num--) {
407
408         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
409         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
410
411         for (i=0;i<16;i++)
412                 {
413 #ifdef B_ENDIAN
414                 T1 = X[i] = W[i];
415 #else
416                 T1 = X[i] = PULL64(W[i]);
417 #endif
418                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
419                 T2 = Sigma0(a) + Maj(a,b,c);
420                 h = g;  g = f;  f = e;  e = d + T1;
421                 d = c;  c = b;  b = a;  a = T1 + T2;
422                 }
423
424         for (;i<80;i++)
425                 {
426                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
427                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
428
429                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
430                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
431                 T2 = Sigma0(a) + Maj(a,b,c);
432                 h = g;  g = f;  f = e;  e = d + T1;
433                 d = c;  c = b;  b = a;  a = T1 + T2;
434                 }
435
436         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
437         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
438
439                         W+=SHA_LBLOCK;
440                         }
441         }
442
443 #else
444
445 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
446         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
447         h = Sigma0(a) + Maj(a,b,c);                     \
448         d += T1;        h += T1;                } while (0)
449
450 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X)        do {    \
451         s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);        \
452         s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);        \
453         T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];    \
454         ROUND_00_15(i,a,b,c,d,e,f,g,h);         } while (0)
455
456 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
457         {
458         const SHA_LONG64 *W=in;
459         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
460         SHA_LONG64      X[16];
461         int i;
462
463 #ifdef GO_FOR_SSE2
464         GO_FOR_SSE2(ctx,in,num);
465 #endif
466
467                         while (num--) {
468
469         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
470         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
471
472 #ifdef B_ENDIAN
473         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
474         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
475         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
476         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
477         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
478         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
479         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
480         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
481         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
482         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
483         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
484         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
485         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
486         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
487         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
488         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
489 #else
490         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
491         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
492         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
493         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
494         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
495         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
496         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
497         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
498         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
499         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
500         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
501         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
502         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
503         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
504         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
505         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
506 #endif
507
508         for (i=16;i<80;i+=8)
509                 {
510                 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
511                 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
512                 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
513                 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
514                 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
515                 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
516                 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
517                 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
518                 }
519
520         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
521         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
522
523                         W+=SHA_LBLOCK;
524                         }
525         }
526
527 #endif
528
529 #endif /* SHA512_ASM */
530
531 #endif /* OPENSSL_NO_SHA512 */