2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
11 * SHA512 low level APIs are deprecated for public use, but still ok for
14 #include "internal/deprecated.h"
17 #include <openssl/opensslconf.h>
19 * IMPLEMENTATION NOTES.
21 * As you might have noticed 32-bit hash algorithms:
23 * - permit SHA_LONG to be wider than 32-bit
24 * - optimized versions implement two transform functions: one operating
25 * on [aligned] data in host byte order and one - on data in input
27 * - share common byte-order neutral collector and padding function
28 * implementations, crypto/md32_common.h;
30 * Neither of the above applies to this SHA-512 implementations. Reasons
31 * [in reverse order] are:
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 * there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 * *aligned* data in input stream byte order, big-endian in this case]
37 * we minimize burden of maintenance in two ways: a) collector/padding
38 * function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 * apply a number of optimizations to mitigate potential performance
41 * penalties caused by previous design decision;
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
55 #include <openssl/crypto.h>
56 #include <openssl/sha.h>
57 #include <openssl/opensslv.h>
59 #include "internal/cryptlib.h"
60 #include "crypto/sha.h"
62 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64 defined(__s390__) || defined(__s390x__) || \
65 defined(__aarch64__) || \
67 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
70 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71 # define U64(C) C##UI64
72 #elif defined(__arch64__)
75 # define U64(C) C##ULL
78 int sha512_224_init(SHA512_CTX *c)
80 c->h[0] = U64(0x8c3d37c819544da2);
81 c->h[1] = U64(0x73e1996689dcd4d6);
82 c->h[2] = U64(0x1dfab7ae32ff9c82);
83 c->h[3] = U64(0x679dd514582f9fcf);
84 c->h[4] = U64(0x0f6d2b697bd44da8);
85 c->h[5] = U64(0x77e36f7304c48942);
86 c->h[6] = U64(0x3f9d85a86a1d36c8);
87 c->h[7] = U64(0x1112e6ad91d692a1);
92 c->md_len = SHA224_DIGEST_LENGTH;
96 int sha512_256_init(SHA512_CTX *c)
98 c->h[0] = U64(0x22312194fc2bf72c);
99 c->h[1] = U64(0x9f555fa3c84c64c2);
100 c->h[2] = U64(0x2393b86b6f53b151);
101 c->h[3] = U64(0x963877195940eabd);
102 c->h[4] = U64(0x96283ee2a88effe3);
103 c->h[5] = U64(0xbe5e1e2553863992);
104 c->h[6] = U64(0x2b0199fc2c85b8aa);
105 c->h[7] = U64(0x0eb72ddc81c52ca2);
110 c->md_len = SHA256_DIGEST_LENGTH;
114 int SHA384_Init(SHA512_CTX *c)
116 c->h[0] = U64(0xcbbb9d5dc1059ed8);
117 c->h[1] = U64(0x629a292a367cd507);
118 c->h[2] = U64(0x9159015a3070dd17);
119 c->h[3] = U64(0x152fecd8f70e5939);
120 c->h[4] = U64(0x67332667ffc00b31);
121 c->h[5] = U64(0x8eb44a8768581511);
122 c->h[6] = U64(0xdb0c2e0d64f98fa7);
123 c->h[7] = U64(0x47b5481dbefa4fa4);
128 c->md_len = SHA384_DIGEST_LENGTH;
132 int SHA512_Init(SHA512_CTX *c)
134 c->h[0] = U64(0x6a09e667f3bcc908);
135 c->h[1] = U64(0xbb67ae8584caa73b);
136 c->h[2] = U64(0x3c6ef372fe94f82b);
137 c->h[3] = U64(0xa54ff53a5f1d36f1);
138 c->h[4] = U64(0x510e527fade682d1);
139 c->h[5] = U64(0x9b05688c2b3e6c1f);
140 c->h[6] = U64(0x1f83d9abfb41bd6b);
141 c->h[7] = U64(0x5be0cd19137e2179);
146 c->md_len = SHA512_DIGEST_LENGTH;
153 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
155 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
157 unsigned char *p = (unsigned char *)c->u.p;
160 p[n] = 0x80; /* There always is a room for one */
162 if (n > (sizeof(c->u) - 16)) {
163 memset(p + n, 0, sizeof(c->u) - n);
165 sha512_block_data_order(c, p, 1);
168 memset(p + n, 0, sizeof(c->u) - 16 - n);
170 c->u.d[SHA_LBLOCK - 2] = c->Nh;
171 c->u.d[SHA_LBLOCK - 1] = c->Nl;
173 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
191 sha512_block_data_order(c, p, 1);
197 /* Let compiler decide if it's appropriate to unroll... */
198 case SHA224_DIGEST_LENGTH:
199 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200 SHA_LONG64 t = c->h[n];
202 *(md++) = (unsigned char)(t >> 56);
203 *(md++) = (unsigned char)(t >> 48);
204 *(md++) = (unsigned char)(t >> 40);
205 *(md++) = (unsigned char)(t >> 32);
206 *(md++) = (unsigned char)(t >> 24);
207 *(md++) = (unsigned char)(t >> 16);
208 *(md++) = (unsigned char)(t >> 8);
209 *(md++) = (unsigned char)(t);
212 * For 224 bits, there are four bytes left over that have to be
213 * processed separately.
216 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
218 *(md++) = (unsigned char)(t >> 56);
219 *(md++) = (unsigned char)(t >> 48);
220 *(md++) = (unsigned char)(t >> 40);
221 *(md++) = (unsigned char)(t >> 32);
224 case SHA256_DIGEST_LENGTH:
225 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226 SHA_LONG64 t = c->h[n];
228 *(md++) = (unsigned char)(t >> 56);
229 *(md++) = (unsigned char)(t >> 48);
230 *(md++) = (unsigned char)(t >> 40);
231 *(md++) = (unsigned char)(t >> 32);
232 *(md++) = (unsigned char)(t >> 24);
233 *(md++) = (unsigned char)(t >> 16);
234 *(md++) = (unsigned char)(t >> 8);
235 *(md++) = (unsigned char)(t);
238 case SHA384_DIGEST_LENGTH:
239 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240 SHA_LONG64 t = c->h[n];
242 *(md++) = (unsigned char)(t >> 56);
243 *(md++) = (unsigned char)(t >> 48);
244 *(md++) = (unsigned char)(t >> 40);
245 *(md++) = (unsigned char)(t >> 32);
246 *(md++) = (unsigned char)(t >> 24);
247 *(md++) = (unsigned char)(t >> 16);
248 *(md++) = (unsigned char)(t >> 8);
249 *(md++) = (unsigned char)(t);
252 case SHA512_DIGEST_LENGTH:
253 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254 SHA_LONG64 t = c->h[n];
256 *(md++) = (unsigned char)(t >> 56);
257 *(md++) = (unsigned char)(t >> 48);
258 *(md++) = (unsigned char)(t >> 40);
259 *(md++) = (unsigned char)(t >> 32);
260 *(md++) = (unsigned char)(t >> 24);
261 *(md++) = (unsigned char)(t >> 16);
262 *(md++) = (unsigned char)(t >> 8);
263 *(md++) = (unsigned char)(t);
266 /* ... as well as make sure md_len is not abused. */
274 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
276 return SHA512_Final(md, c);
279 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
282 unsigned char *p = c->u.p;
283 const unsigned char *data = (const unsigned char *)_data;
288 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
291 if (sizeof(len) >= 8)
292 c->Nh += (((SHA_LONG64) len) >> 61);
296 size_t n = sizeof(c->u) - c->num;
299 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
302 memcpy(p + c->num, data, n), c->num = 0;
304 sha512_block_data_order(c, p, 1);
308 if (len >= sizeof(c->u)) {
309 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310 if ((size_t)data % sizeof(c->u.d[0]) != 0)
311 while (len >= sizeof(c->u))
312 memcpy(p, data, sizeof(c->u)),
313 sha512_block_data_order(c, p, 1),
314 len -= sizeof(c->u), data += sizeof(c->u);
317 sha512_block_data_order(c, data, len / sizeof(c->u)),
318 data += len, len %= sizeof(c->u), data -= len;
322 memcpy(p, data, len), c->num = (int)len;
327 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
329 return SHA512_Update(c, data, len);
332 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
334 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335 if ((size_t)data % sizeof(c->u.d[0]) != 0)
336 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
338 sha512_block_data_order(c, data, 1);
342 static const SHA_LONG64 K512[80] = {
343 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
386 # if defined(__GNUC__) && __GNUC__>=2 && \
387 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
388 # if defined(__x86_64) || defined(__x86_64__)
389 # define ROTR(a,n) ({ SHA_LONG64 ret; \
394 # if !defined(B_ENDIAN)
395 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
400 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401 # if defined(I386_ONLY)
402 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
403 unsigned int hi=p[0],lo=p[1]; \
404 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405 "roll $16,%%eax; roll $16,%%edx; "\
406 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
407 : "=a"(lo),"=d"(hi) \
408 : "0"(lo),"1"(hi) : "cc"); \
409 ((SHA_LONG64)hi)<<32|lo; })
411 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
412 unsigned int hi=p[0],lo=p[1]; \
413 asm ("bswapl %0; bswapl %1;" \
414 : "=r"(lo),"=r"(hi) \
415 : "0"(lo),"1"(hi)); \
416 ((SHA_LONG64)hi)<<32|lo; })
418 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
419 # define ROTR(a,n) ({ SHA_LONG64 ret; \
420 asm ("rotrdi %0,%1,%2" \
422 : "r"(a),"K"(n)); ret; })
423 # elif defined(__aarch64__)
424 # define ROTR(a,n) ({ SHA_LONG64 ret; \
425 asm ("ror %0,%1,%2" \
427 : "r"(a),"I"(n)); ret; })
428 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
429 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
430 # define PULL64(x) ({ SHA_LONG64 ret; \
433 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
435 # elif (__riscv_zbkb || __riscv_zbb) && __riscv_xlen == 32
436 # define PULL64(x) ({ SHA_LONG64 ret; \
437 unsigned int *r = (unsigned int *)(&(ret)); \
438 const unsigned int *p = (const unsigned int *)(&(x)); \
444 : "r" (p[0])); ret; })
445 # elif (__riscv_zbkb || __riscv_zbb) && __riscv_xlen == 64
446 # define PULL64(x) ({ SHA_LONG64 ret; \
451 # if __riscv_zknh && __riscv_xlen == 32
452 # define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
453 const unsigned int *p = (const unsigned int *)(&(x)); \
454 asm ("sha512sum0r %0, %1, %2" \
456 : "r" (p[0]), "r" (p[1])); \
457 asm ("sha512sum0r %0, %2, %1" \
459 : "r" (p[0]), "r" (p[1])); ret; })
460 # define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
461 const unsigned int *p = (const unsigned int *)(&(x)); \
462 asm ("sha512sum1r %0, %1, %2" \
464 : "r" (p[0]), "r" (p[1])); \
465 asm ("sha512sum1r %0, %2, %1" \
467 : "r" (p[0]), "r" (p[1])); ret; })
468 # define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
469 const unsigned int *p = (const unsigned int *)(&(x)); \
470 asm ("sha512sig0l %0, %1, %2" \
472 : "r" (p[0]), "r" (p[1])); \
473 asm ("sha512sig0h %0, %2, %1" \
475 : "r" (p[0]), "r" (p[1])); ret; })
476 # define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
477 const unsigned int *p = (const unsigned int *)(&(x)); \
478 asm ("sha512sig1l %0, %1, %2" \
480 : "r" (p[0]), "r" (p[1])); \
481 asm ("sha512sig1h %0, %2, %1" \
483 : "r" (p[0]), "r" (p[1])); ret; })
484 # elif __riscv_zknh && __riscv_xlen == 64
485 # define Sigma0(x) ({ SHA_LONG64 ret; \
486 asm ("sha512sum0 %0, %1" \
489 # define Sigma1(x) ({ SHA_LONG64 ret; \
490 asm ("sha512sum1 %0, %1" \
493 # define sigma0(x) ({ SHA_LONG64 ret; \
494 asm ("sha512sig0 %0, %1" \
497 # define sigma1(x) ({ SHA_LONG64 ret; \
498 asm ("sha512sig1 %0, %1" \
502 # if (__riscv_zbt || __riscv_zpn) && __riscv_xlen == 32
503 # define Ch(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
504 const unsigned int *xp = (const unsigned int *)(&(x)); \
505 const unsigned int *yp = (const unsigned int *)(&(y)); \
506 const unsigned int *zp = (const unsigned int *)(&(z)); \
507 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
509 : "r"(xp[0]), "r"(yp[0]), "r"(zp[0])); \
510 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
512 : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
513 # define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
514 const unsigned int *xp = (const unsigned int *)(&(x)); \
515 const unsigned int *yp = (const unsigned int *)(&(y)); \
516 const unsigned int *zp = (const unsigned int *)(&(z)); \
517 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
519 : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0])); \
520 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
522 : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
523 # elif (__riscv_zbt || __riscv_zpn) && __riscv_xlen == 64
524 # define Ch(x,y,z) ({ SHA_LONG64 ret; \
525 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
527 : "r"(x), "r"(y), "r"(z)); ret; })
528 # define Maj(x,y,z) ({ SHA_LONG64 ret; \
529 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
531 : "r"(x^z), "r"(y), "r"(x)); ret; })
533 # elif defined(_MSC_VER)
534 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
535 # pragma intrinsic(_rotr64)
536 # define ROTR(a,n) _rotr64((a),n)
538 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
539 !defined(OPENSSL_NO_INLINE_ASM)
540 # if defined(I386_ONLY)
541 static SHA_LONG64 __fastcall __pull64be(const void *x)
543 _asm mov edx,[ecx + 0]
544 _asm mov eax,[ecx + 4]
553 static SHA_LONG64 __fastcall __pull64be(const void *x)
555 _asm mov edx,[ecx + 0]
556 _asm mov eax,[ecx + 4]
561 # define PULL64(x) __pull64be(&(x))
566 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
567 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
570 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
573 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
576 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
579 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
582 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
585 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
588 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
591 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
593 * This code should give better results on 32-bit CPU with less than
594 * ~24 registers, both size and performance wise...
597 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
600 const SHA_LONG64 *W = in;
602 SHA_LONG64 X[9 + 80], *F;
617 for (i = 0; i < 16; i++, F--) {
626 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
628 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
631 for (; i < 80; i++, F--) {
632 T = sigma0(F[8 + 16 - 1]);
633 T += sigma1(F[8 + 16 - 14]);
634 T += F[8 + 16] + F[8 + 16 - 9];
639 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
641 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
657 # elif defined(OPENSSL_SMALL_FOOTPRINT)
659 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
662 const SHA_LONG64 *W = in;
663 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
678 for (i = 0; i < 16; i++) {
682 T1 = X[i] = PULL64(W[i]);
684 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
685 T2 = Sigma0(a) + Maj(a, b, c);
696 for (; i < 80; i++) {
697 s0 = X[(i + 1) & 0x0f];
699 s1 = X[(i + 14) & 0x0f];
702 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
703 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
704 T2 = Sigma0(a) + Maj(a, b, c);
729 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
730 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
731 h = Sigma0(a) + Maj(a,b,c); \
732 d += T1; h += T1; } while (0)
734 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
735 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
736 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
737 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
738 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
740 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
743 const SHA_LONG64 *W = in;
744 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
761 ROUND_00_15(0, a, b, c, d, e, f, g, h);
763 ROUND_00_15(1, h, a, b, c, d, e, f, g);
765 ROUND_00_15(2, g, h, a, b, c, d, e, f);
767 ROUND_00_15(3, f, g, h, a, b, c, d, e);
769 ROUND_00_15(4, e, f, g, h, a, b, c, d);
771 ROUND_00_15(5, d, e, f, g, h, a, b, c);
773 ROUND_00_15(6, c, d, e, f, g, h, a, b);
775 ROUND_00_15(7, b, c, d, e, f, g, h, a);
777 ROUND_00_15(8, a, b, c, d, e, f, g, h);
779 ROUND_00_15(9, h, a, b, c, d, e, f, g);
781 ROUND_00_15(10, g, h, a, b, c, d, e, f);
783 ROUND_00_15(11, f, g, h, a, b, c, d, e);
785 ROUND_00_15(12, e, f, g, h, a, b, c, d);
787 ROUND_00_15(13, d, e, f, g, h, a, b, c);
789 ROUND_00_15(14, c, d, e, f, g, h, a, b);
791 ROUND_00_15(15, b, c, d, e, f, g, h, a);
793 T1 = X[0] = PULL64(W[0]);
794 ROUND_00_15(0, a, b, c, d, e, f, g, h);
795 T1 = X[1] = PULL64(W[1]);
796 ROUND_00_15(1, h, a, b, c, d, e, f, g);
797 T1 = X[2] = PULL64(W[2]);
798 ROUND_00_15(2, g, h, a, b, c, d, e, f);
799 T1 = X[3] = PULL64(W[3]);
800 ROUND_00_15(3, f, g, h, a, b, c, d, e);
801 T1 = X[4] = PULL64(W[4]);
802 ROUND_00_15(4, e, f, g, h, a, b, c, d);
803 T1 = X[5] = PULL64(W[5]);
804 ROUND_00_15(5, d, e, f, g, h, a, b, c);
805 T1 = X[6] = PULL64(W[6]);
806 ROUND_00_15(6, c, d, e, f, g, h, a, b);
807 T1 = X[7] = PULL64(W[7]);
808 ROUND_00_15(7, b, c, d, e, f, g, h, a);
809 T1 = X[8] = PULL64(W[8]);
810 ROUND_00_15(8, a, b, c, d, e, f, g, h);
811 T1 = X[9] = PULL64(W[9]);
812 ROUND_00_15(9, h, a, b, c, d, e, f, g);
813 T1 = X[10] = PULL64(W[10]);
814 ROUND_00_15(10, g, h, a, b, c, d, e, f);
815 T1 = X[11] = PULL64(W[11]);
816 ROUND_00_15(11, f, g, h, a, b, c, d, e);
817 T1 = X[12] = PULL64(W[12]);
818 ROUND_00_15(12, e, f, g, h, a, b, c, d);
819 T1 = X[13] = PULL64(W[13]);
820 ROUND_00_15(13, d, e, f, g, h, a, b, c);
821 T1 = X[14] = PULL64(W[14]);
822 ROUND_00_15(14, c, d, e, f, g, h, a, b);
823 T1 = X[15] = PULL64(W[15]);
824 ROUND_00_15(15, b, c, d, e, f, g, h, a);
827 for (i = 16; i < 80; i += 16) {
828 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
829 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
830 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
831 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
832 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
833 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
834 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
835 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
836 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
837 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
838 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
839 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
840 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
841 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
842 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
843 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
861 #endif /* SHA512_ASM */