2 * Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
11 * SHA512 low level APIs are deprecated for public use, but still ok for
14 #include "internal/deprecated.h"
17 #include <openssl/opensslconf.h>
19 * IMPLEMENTATION NOTES.
21 * As you might have noticed 32-bit hash algorithms:
23 * - permit SHA_LONG to be wider than 32-bit
24 * - optimized versions implement two transform functions: one operating
25 * on [aligned] data in host byte order and one - on data in input
27 * - share common byte-order neutral collector and padding function
28 * implementations, ../md32_common.h;
30 * Neither of the above applies to this SHA-512 implementations. Reasons
31 * [in reverse order] are:
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 * there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 * *aligned* data in input stream byte order, big-endian in this case]
37 * we minimize burden of maintenance in two ways: a) collector/padding
38 * function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 * apply a number of optimizations to mitigate potential performance
41 * penalties caused by previous design decision;
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
55 #include <openssl/crypto.h>
56 #include <openssl/sha.h>
57 #include <openssl/opensslv.h>
59 #include "internal/cryptlib.h"
60 #include "crypto/sha.h"
62 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64 defined(__s390__) || defined(__s390x__) || \
65 defined(__aarch64__) || \
67 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
70 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71 # define U64(C) C##UI64
72 #elif defined(__arch64__)
75 # define U64(C) C##ULL
78 int sha512_224_init(SHA512_CTX *c)
80 c->h[0] = U64(0x8c3d37c819544da2);
81 c->h[1] = U64(0x73e1996689dcd4d6);
82 c->h[2] = U64(0x1dfab7ae32ff9c82);
83 c->h[3] = U64(0x679dd514582f9fcf);
84 c->h[4] = U64(0x0f6d2b697bd44da8);
85 c->h[5] = U64(0x77e36f7304c48942);
86 c->h[6] = U64(0x3f9d85a86a1d36c8);
87 c->h[7] = U64(0x1112e6ad91d692a1);
92 c->md_len = SHA224_DIGEST_LENGTH;
96 int sha512_256_init(SHA512_CTX *c)
98 c->h[0] = U64(0x22312194fc2bf72c);
99 c->h[1] = U64(0x9f555fa3c84c64c2);
100 c->h[2] = U64(0x2393b86b6f53b151);
101 c->h[3] = U64(0x963877195940eabd);
102 c->h[4] = U64(0x96283ee2a88effe3);
103 c->h[5] = U64(0xbe5e1e2553863992);
104 c->h[6] = U64(0x2b0199fc2c85b8aa);
105 c->h[7] = U64(0x0eb72ddc81c52ca2);
110 c->md_len = SHA256_DIGEST_LENGTH;
114 int SHA384_Init(SHA512_CTX *c)
116 c->h[0] = U64(0xcbbb9d5dc1059ed8);
117 c->h[1] = U64(0x629a292a367cd507);
118 c->h[2] = U64(0x9159015a3070dd17);
119 c->h[3] = U64(0x152fecd8f70e5939);
120 c->h[4] = U64(0x67332667ffc00b31);
121 c->h[5] = U64(0x8eb44a8768581511);
122 c->h[6] = U64(0xdb0c2e0d64f98fa7);
123 c->h[7] = U64(0x47b5481dbefa4fa4);
128 c->md_len = SHA384_DIGEST_LENGTH;
132 int SHA512_Init(SHA512_CTX *c)
134 c->h[0] = U64(0x6a09e667f3bcc908);
135 c->h[1] = U64(0xbb67ae8584caa73b);
136 c->h[2] = U64(0x3c6ef372fe94f82b);
137 c->h[3] = U64(0xa54ff53a5f1d36f1);
138 c->h[4] = U64(0x510e527fade682d1);
139 c->h[5] = U64(0x9b05688c2b3e6c1f);
140 c->h[6] = U64(0x1f83d9abfb41bd6b);
141 c->h[7] = U64(0x5be0cd19137e2179);
146 c->md_len = SHA512_DIGEST_LENGTH;
153 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
155 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
157 unsigned char *p = (unsigned char *)c->u.p;
160 p[n] = 0x80; /* There always is a room for one */
162 if (n > (sizeof(c->u) - 16)) {
163 memset(p + n, 0, sizeof(c->u) - n);
165 sha512_block_data_order(c, p, 1);
168 memset(p + n, 0, sizeof(c->u) - 16 - n);
170 c->u.d[SHA_LBLOCK - 2] = c->Nh;
171 c->u.d[SHA_LBLOCK - 1] = c->Nl;
173 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
191 sha512_block_data_order(c, p, 1);
197 /* Let compiler decide if it's appropriate to unroll... */
198 case SHA224_DIGEST_LENGTH:
199 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200 SHA_LONG64 t = c->h[n];
202 *(md++) = (unsigned char)(t >> 56);
203 *(md++) = (unsigned char)(t >> 48);
204 *(md++) = (unsigned char)(t >> 40);
205 *(md++) = (unsigned char)(t >> 32);
206 *(md++) = (unsigned char)(t >> 24);
207 *(md++) = (unsigned char)(t >> 16);
208 *(md++) = (unsigned char)(t >> 8);
209 *(md++) = (unsigned char)(t);
212 * For 224 bits, there are four bytes left over that have to be
213 * processed separately.
216 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
218 *(md++) = (unsigned char)(t >> 56);
219 *(md++) = (unsigned char)(t >> 48);
220 *(md++) = (unsigned char)(t >> 40);
221 *(md++) = (unsigned char)(t >> 32);
224 case SHA256_DIGEST_LENGTH:
225 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226 SHA_LONG64 t = c->h[n];
228 *(md++) = (unsigned char)(t >> 56);
229 *(md++) = (unsigned char)(t >> 48);
230 *(md++) = (unsigned char)(t >> 40);
231 *(md++) = (unsigned char)(t >> 32);
232 *(md++) = (unsigned char)(t >> 24);
233 *(md++) = (unsigned char)(t >> 16);
234 *(md++) = (unsigned char)(t >> 8);
235 *(md++) = (unsigned char)(t);
238 case SHA384_DIGEST_LENGTH:
239 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240 SHA_LONG64 t = c->h[n];
242 *(md++) = (unsigned char)(t >> 56);
243 *(md++) = (unsigned char)(t >> 48);
244 *(md++) = (unsigned char)(t >> 40);
245 *(md++) = (unsigned char)(t >> 32);
246 *(md++) = (unsigned char)(t >> 24);
247 *(md++) = (unsigned char)(t >> 16);
248 *(md++) = (unsigned char)(t >> 8);
249 *(md++) = (unsigned char)(t);
252 case SHA512_DIGEST_LENGTH:
253 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254 SHA_LONG64 t = c->h[n];
256 *(md++) = (unsigned char)(t >> 56);
257 *(md++) = (unsigned char)(t >> 48);
258 *(md++) = (unsigned char)(t >> 40);
259 *(md++) = (unsigned char)(t >> 32);
260 *(md++) = (unsigned char)(t >> 24);
261 *(md++) = (unsigned char)(t >> 16);
262 *(md++) = (unsigned char)(t >> 8);
263 *(md++) = (unsigned char)(t);
266 /* ... as well as make sure md_len is not abused. */
274 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
276 return SHA512_Final(md, c);
279 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
282 unsigned char *p = c->u.p;
283 const unsigned char *data = (const unsigned char *)_data;
288 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
291 if (sizeof(len) >= 8)
292 c->Nh += (((SHA_LONG64) len) >> 61);
296 size_t n = sizeof(c->u) - c->num;
299 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
302 memcpy(p + c->num, data, n), c->num = 0;
304 sha512_block_data_order(c, p, 1);
308 if (len >= sizeof(c->u)) {
309 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310 if ((size_t)data % sizeof(c->u.d[0]) != 0)
311 while (len >= sizeof(c->u))
312 memcpy(p, data, sizeof(c->u)),
313 sha512_block_data_order(c, p, 1),
314 len -= sizeof(c->u), data += sizeof(c->u);
317 sha512_block_data_order(c, data, len / sizeof(c->u)),
318 data += len, len %= sizeof(c->u), data -= len;
322 memcpy(p, data, len), c->num = (int)len;
327 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
329 return SHA512_Update(c, data, len);
332 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
334 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335 if ((size_t)data % sizeof(c->u.d[0]) != 0)
336 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
338 sha512_block_data_order(c, data, 1);
341 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
344 static unsigned char m[SHA384_DIGEST_LENGTH];
349 SHA512_Update(&c, d, n);
350 SHA512_Final(md, &c);
351 OPENSSL_cleanse(&c, sizeof(c));
355 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
358 static unsigned char m[SHA512_DIGEST_LENGTH];
363 SHA512_Update(&c, d, n);
364 SHA512_Final(md, &c);
365 OPENSSL_cleanse(&c, sizeof(c));
370 static const SHA_LONG64 K512[80] = {
371 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
372 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
373 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
374 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
375 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
376 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
377 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
378 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
379 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
380 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
381 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
382 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
383 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
384 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
385 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
386 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
387 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
388 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
389 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
390 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
391 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
392 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
393 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
394 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
395 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
396 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
397 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
398 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
399 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
400 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
401 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
402 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
403 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
404 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
405 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
406 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
407 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
408 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
409 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
410 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
414 # if defined(__GNUC__) && __GNUC__>=2 && \
415 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
416 # if defined(__x86_64) || defined(__x86_64__)
417 # define ROTR(a,n) ({ SHA_LONG64 ret; \
422 # if !defined(B_ENDIAN)
423 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
428 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
429 # if defined(I386_ONLY)
430 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
431 unsigned int hi=p[0],lo=p[1]; \
432 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
433 "roll $16,%%eax; roll $16,%%edx; "\
434 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
435 : "=a"(lo),"=d"(hi) \
436 : "0"(lo),"1"(hi) : "cc"); \
437 ((SHA_LONG64)hi)<<32|lo; })
439 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
440 unsigned int hi=p[0],lo=p[1]; \
441 asm ("bswapl %0; bswapl %1;" \
442 : "=r"(lo),"=r"(hi) \
443 : "0"(lo),"1"(hi)); \
444 ((SHA_LONG64)hi)<<32|lo; })
446 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
447 # define ROTR(a,n) ({ SHA_LONG64 ret; \
448 asm ("rotrdi %0,%1,%2" \
450 : "r"(a),"K"(n)); ret; })
451 # elif defined(__aarch64__)
452 # define ROTR(a,n) ({ SHA_LONG64 ret; \
453 asm ("ror %0,%1,%2" \
455 : "r"(a),"I"(n)); ret; })
456 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
457 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
458 # define PULL64(x) ({ SHA_LONG64 ret; \
461 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
464 # elif defined(_MSC_VER)
465 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
466 # pragma intrinsic(_rotr64)
467 # define ROTR(a,n) _rotr64((a),n)
469 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
470 !defined(OPENSSL_NO_INLINE_ASM)
471 # if defined(I386_ONLY)
472 static SHA_LONG64 __fastcall __pull64be(const void *x)
474 _asm mov edx,[ecx + 0]
475 _asm mov eax,[ecx + 4]
484 static SHA_LONG64 __fastcall __pull64be(const void *x)
486 _asm mov edx,[ecx + 0]
487 _asm mov eax,[ecx + 4]
492 # define PULL64(x) __pull64be(&(x))
497 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
498 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
501 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
503 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
504 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
505 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
506 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
507 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
508 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
510 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
512 * This code should give better results on 32-bit CPU with less than
513 * ~24 registers, both size and performance wise...
516 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
519 const SHA_LONG64 *W = in;
521 SHA_LONG64 X[9 + 80], *F;
536 for (i = 0; i < 16; i++, F--) {
545 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
547 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
550 for (; i < 80; i++, F--) {
551 T = sigma0(F[8 + 16 - 1]);
552 T += sigma1(F[8 + 16 - 14]);
553 T += F[8 + 16] + F[8 + 16 - 9];
558 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
560 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
576 # elif defined(OPENSSL_SMALL_FOOTPRINT)
578 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
581 const SHA_LONG64 *W = in;
582 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
597 for (i = 0; i < 16; i++) {
601 T1 = X[i] = PULL64(W[i]);
603 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
604 T2 = Sigma0(a) + Maj(a, b, c);
615 for (; i < 80; i++) {
616 s0 = X[(i + 1) & 0x0f];
618 s1 = X[(i + 14) & 0x0f];
621 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
622 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
623 T2 = Sigma0(a) + Maj(a, b, c);
648 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
649 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
650 h = Sigma0(a) + Maj(a,b,c); \
651 d += T1; h += T1; } while (0)
653 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
654 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
655 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
656 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
657 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
659 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
662 const SHA_LONG64 *W = in;
663 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
680 ROUND_00_15(0, a, b, c, d, e, f, g, h);
682 ROUND_00_15(1, h, a, b, c, d, e, f, g);
684 ROUND_00_15(2, g, h, a, b, c, d, e, f);
686 ROUND_00_15(3, f, g, h, a, b, c, d, e);
688 ROUND_00_15(4, e, f, g, h, a, b, c, d);
690 ROUND_00_15(5, d, e, f, g, h, a, b, c);
692 ROUND_00_15(6, c, d, e, f, g, h, a, b);
694 ROUND_00_15(7, b, c, d, e, f, g, h, a);
696 ROUND_00_15(8, a, b, c, d, e, f, g, h);
698 ROUND_00_15(9, h, a, b, c, d, e, f, g);
700 ROUND_00_15(10, g, h, a, b, c, d, e, f);
702 ROUND_00_15(11, f, g, h, a, b, c, d, e);
704 ROUND_00_15(12, e, f, g, h, a, b, c, d);
706 ROUND_00_15(13, d, e, f, g, h, a, b, c);
708 ROUND_00_15(14, c, d, e, f, g, h, a, b);
710 ROUND_00_15(15, b, c, d, e, f, g, h, a);
712 T1 = X[0] = PULL64(W[0]);
713 ROUND_00_15(0, a, b, c, d, e, f, g, h);
714 T1 = X[1] = PULL64(W[1]);
715 ROUND_00_15(1, h, a, b, c, d, e, f, g);
716 T1 = X[2] = PULL64(W[2]);
717 ROUND_00_15(2, g, h, a, b, c, d, e, f);
718 T1 = X[3] = PULL64(W[3]);
719 ROUND_00_15(3, f, g, h, a, b, c, d, e);
720 T1 = X[4] = PULL64(W[4]);
721 ROUND_00_15(4, e, f, g, h, a, b, c, d);
722 T1 = X[5] = PULL64(W[5]);
723 ROUND_00_15(5, d, e, f, g, h, a, b, c);
724 T1 = X[6] = PULL64(W[6]);
725 ROUND_00_15(6, c, d, e, f, g, h, a, b);
726 T1 = X[7] = PULL64(W[7]);
727 ROUND_00_15(7, b, c, d, e, f, g, h, a);
728 T1 = X[8] = PULL64(W[8]);
729 ROUND_00_15(8, a, b, c, d, e, f, g, h);
730 T1 = X[9] = PULL64(W[9]);
731 ROUND_00_15(9, h, a, b, c, d, e, f, g);
732 T1 = X[10] = PULL64(W[10]);
733 ROUND_00_15(10, g, h, a, b, c, d, e, f);
734 T1 = X[11] = PULL64(W[11]);
735 ROUND_00_15(11, f, g, h, a, b, c, d, e);
736 T1 = X[12] = PULL64(W[12]);
737 ROUND_00_15(12, e, f, g, h, a, b, c, d);
738 T1 = X[13] = PULL64(W[13]);
739 ROUND_00_15(13, d, e, f, g, h, a, b, c);
740 T1 = X[14] = PULL64(W[14]);
741 ROUND_00_15(14, c, d, e, f, g, h, a, b);
742 T1 = X[15] = PULL64(W[15]);
743 ROUND_00_15(15, b, c, d, e, f, g, h, a);
746 for (i = 16; i < 80; i += 16) {
747 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
748 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
749 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
750 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
751 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
752 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
753 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
754 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
755 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
756 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
757 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
758 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
759 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
760 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
761 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
762 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
780 #endif /* SHA512_ASM */