1 /* ====================================================================
2 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
3 * according to the OpenSSL license [found in ../../LICENSE].
4 * ====================================================================
6 #include <openssl/opensslconf.h>
8 * IMPLEMENTATION NOTES.
10 * As you might have noticed 32-bit hash algorithms:
12 * - permit SHA_LONG to be wider than 32-bit
13 * - optimized versions implement two transform functions: one operating
14 * on [aligned] data in host byte order and one - on data in input
16 * - share common byte-order neutral collector and padding function
17 * implementations, ../md32_common.h;
19 * Neither of the above applies to this SHA-512 implementations. Reasons
20 * [in reverse order] are:
22 * - it's the only 64-bit hash algorithm for the moment of this writing,
23 * there is no need for common collector/padding implementation [yet];
24 * - by supporting only one transform function [which operates on
25 * *aligned* data in input stream byte order, big-endian in this case]
26 * we minimize burden of maintenance in two ways: a) collector/padding
27 * function is simpler; b) only one transform function to stare at;
28 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
29 * apply a number of optimizations to mitigate potential performance
30 * penalties caused by previous design decision;
34 * Implementation relies on the fact that "long long" is 64-bit on
35 * both 32- and 64-bit platforms. If some compiler vendor comes up
36 * with 128-bit long long, adjustment to sha.h would be required.
37 * As this implementation relies on 64-bit integer type, it's totally
38 * inappropriate for platforms which don't support it, most notably
40 * <appro@fy.chalmers.se>
45 #include <openssl/crypto.h>
46 #include <openssl/sha.h>
47 #include <openssl/opensslv.h>
49 #include "internal/cryptlib.h"
51 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
52 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
53 defined(__s390__) || defined(__s390x__) || \
54 defined(__aarch64__) || \
56 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
59 int SHA384_Init(SHA512_CTX *c)
61 c->h[0] = U64(0xcbbb9d5dc1059ed8);
62 c->h[1] = U64(0x629a292a367cd507);
63 c->h[2] = U64(0x9159015a3070dd17);
64 c->h[3] = U64(0x152fecd8f70e5939);
65 c->h[4] = U64(0x67332667ffc00b31);
66 c->h[5] = U64(0x8eb44a8768581511);
67 c->h[6] = U64(0xdb0c2e0d64f98fa7);
68 c->h[7] = U64(0x47b5481dbefa4fa4);
73 c->md_len = SHA384_DIGEST_LENGTH;
77 int SHA512_Init(SHA512_CTX *c)
79 c->h[0] = U64(0x6a09e667f3bcc908);
80 c->h[1] = U64(0xbb67ae8584caa73b);
81 c->h[2] = U64(0x3c6ef372fe94f82b);
82 c->h[3] = U64(0xa54ff53a5f1d36f1);
83 c->h[4] = U64(0x510e527fade682d1);
84 c->h[5] = U64(0x9b05688c2b3e6c1f);
85 c->h[6] = U64(0x1f83d9abfb41bd6b);
86 c->h[7] = U64(0x5be0cd19137e2179);
91 c->md_len = SHA512_DIGEST_LENGTH;
98 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
100 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
102 unsigned char *p = (unsigned char *)c->u.p;
105 p[n] = 0x80; /* There always is a room for one */
107 if (n > (sizeof(c->u) - 16)) {
108 memset(p + n, 0, sizeof(c->u) - n);
110 sha512_block_data_order(c, p, 1);
113 memset(p + n, 0, sizeof(c->u) - 16 - n);
115 c->u.d[SHA_LBLOCK - 2] = c->Nh;
116 c->u.d[SHA_LBLOCK - 1] = c->Nl;
118 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
119 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
120 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
121 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
122 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
123 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
124 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
125 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
126 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
127 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
128 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
129 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
130 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
131 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
132 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
133 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
136 sha512_block_data_order(c, p, 1);
142 /* Let compiler decide if it's appropriate to unroll... */
143 case SHA384_DIGEST_LENGTH:
144 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
145 SHA_LONG64 t = c->h[n];
147 *(md++) = (unsigned char)(t >> 56);
148 *(md++) = (unsigned char)(t >> 48);
149 *(md++) = (unsigned char)(t >> 40);
150 *(md++) = (unsigned char)(t >> 32);
151 *(md++) = (unsigned char)(t >> 24);
152 *(md++) = (unsigned char)(t >> 16);
153 *(md++) = (unsigned char)(t >> 8);
154 *(md++) = (unsigned char)(t);
157 case SHA512_DIGEST_LENGTH:
158 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
159 SHA_LONG64 t = c->h[n];
161 *(md++) = (unsigned char)(t >> 56);
162 *(md++) = (unsigned char)(t >> 48);
163 *(md++) = (unsigned char)(t >> 40);
164 *(md++) = (unsigned char)(t >> 32);
165 *(md++) = (unsigned char)(t >> 24);
166 *(md++) = (unsigned char)(t >> 16);
167 *(md++) = (unsigned char)(t >> 8);
168 *(md++) = (unsigned char)(t);
171 /* ... as well as make sure md_len is not abused. */
179 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
181 return SHA512_Final(md, c);
184 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
187 unsigned char *p = c->u.p;
188 const unsigned char *data = (const unsigned char *)_data;
193 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
196 if (sizeof(len) >= 8)
197 c->Nh += (((SHA_LONG64) len) >> 61);
201 size_t n = sizeof(c->u) - c->num;
204 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
207 memcpy(p + c->num, data, n), c->num = 0;
209 sha512_block_data_order(c, p, 1);
213 if (len >= sizeof(c->u)) {
214 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
215 if ((size_t)data % sizeof(c->u.d[0]) != 0)
216 while (len >= sizeof(c->u))
217 memcpy(p, data, sizeof(c->u)),
218 sha512_block_data_order(c, p, 1),
219 len -= sizeof(c->u), data += sizeof(c->u);
222 sha512_block_data_order(c, data, len / sizeof(c->u)),
223 data += len, len %= sizeof(c->u), data -= len;
227 memcpy(p, data, len), c->num = (int)len;
232 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
234 return SHA512_Update(c, data, len);
237 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
239 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
240 if ((size_t)data % sizeof(c->u.d[0]) != 0)
241 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
243 sha512_block_data_order(c, data, 1);
246 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
249 static unsigned char m[SHA384_DIGEST_LENGTH];
254 SHA512_Update(&c, d, n);
255 SHA512_Final(md, &c);
256 OPENSSL_cleanse(&c, sizeof(c));
260 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
263 static unsigned char m[SHA512_DIGEST_LENGTH];
268 SHA512_Update(&c, d, n);
269 SHA512_Final(md, &c);
270 OPENSSL_cleanse(&c, sizeof(c));
275 static const SHA_LONG64 K512[80] = {
276 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
277 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
278 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
279 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
280 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
281 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
282 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
283 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
284 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
285 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
286 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
287 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
288 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
289 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
290 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
291 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
292 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
293 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
294 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
295 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
296 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
297 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
298 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
299 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
300 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
301 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
302 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
303 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
304 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
305 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
306 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
307 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
308 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
309 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
310 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
311 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
312 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
313 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
314 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
315 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
319 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
320 # if defined(__x86_64) || defined(__x86_64__)
321 # define ROTR(a,n) ({ SHA_LONG64 ret; \
326 # if !defined(B_ENDIAN)
327 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
332 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
333 # if defined(I386_ONLY)
334 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
335 unsigned int hi=p[0],lo=p[1]; \
336 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
337 "roll $16,%%eax; roll $16,%%edx; "\
338 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
339 : "=a"(lo),"=d"(hi) \
340 : "0"(lo),"1"(hi) : "cc"); \
341 ((SHA_LONG64)hi)<<32|lo; })
343 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
344 unsigned int hi=p[0],lo=p[1]; \
345 asm ("bswapl %0; bswapl %1;" \
346 : "=r"(lo),"=r"(hi) \
347 : "0"(lo),"1"(hi)); \
348 ((SHA_LONG64)hi)<<32|lo; })
350 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
351 # define ROTR(a,n) ({ SHA_LONG64 ret; \
352 asm ("rotrdi %0,%1,%2" \
354 : "r"(a),"K"(n)); ret; })
355 # elif defined(__aarch64__)
356 # define ROTR(a,n) ({ SHA_LONG64 ret; \
357 asm ("ror %0,%1,%2" \
359 : "r"(a),"I"(n)); ret; })
360 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
361 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
362 # define PULL64(x) ({ SHA_LONG64 ret; \
365 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
368 # elif defined(_MSC_VER)
369 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
370 # pragma intrinsic(_rotr64)
371 # define ROTR(a,n) _rotr64((a),n)
373 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
374 # if defined(I386_ONLY)
375 static SHA_LONG64 __fastcall __pull64be(const void *x)
377 _asm mov edx,[ecx + 0]
378 _asm mov eax,[ecx + 4]
381 _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
383 static SHA_LONG64 __fastcall __pull64be(const void *x)
385 _asm mov edx,[ecx + 0]
386 _asm mov eax,[ecx + 4]
387 _asm bswap edx _asm bswap eax}
389 # define PULL64(x) __pull64be(&(x))
391 # pragma inline_depth(0)
397 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
398 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
401 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
403 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
404 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
405 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
406 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
407 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
408 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
409 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
411 * This code should give better results on 32-bit CPU with less than
412 * ~24 registers, both size and performance wise...
413 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
416 const SHA_LONG64 *W = in;
418 SHA_LONG64 X[9 + 80], *F;
433 for (i = 0; i < 16; i++, F--) {
442 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
444 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
447 for (; i < 80; i++, F--) {
448 T = sigma0(F[8 + 16 - 1]);
449 T += sigma1(F[8 + 16 - 14]);
450 T += F[8 + 16] + F[8 + 16 - 9];
455 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
457 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
473 # elif defined(OPENSSL_SMALL_FOOTPRINT)
474 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
477 const SHA_LONG64 *W = in;
478 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
493 for (i = 0; i < 16; i++) {
497 T1 = X[i] = PULL64(W[i]);
499 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
500 T2 = Sigma0(a) + Maj(a, b, c);
511 for (; i < 80; i++) {
512 s0 = X[(i + 1) & 0x0f];
514 s1 = X[(i + 14) & 0x0f];
517 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
518 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
519 T2 = Sigma0(a) + Maj(a, b, c);
544 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
545 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
546 h = Sigma0(a) + Maj(a,b,c); \
547 d += T1; h += T1; } while (0)
548 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
549 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
550 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
551 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
552 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
553 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
556 const SHA_LONG64 *W = in;
557 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
574 ROUND_00_15(0, a, b, c, d, e, f, g, h);
576 ROUND_00_15(1, h, a, b, c, d, e, f, g);
578 ROUND_00_15(2, g, h, a, b, c, d, e, f);
580 ROUND_00_15(3, f, g, h, a, b, c, d, e);
582 ROUND_00_15(4, e, f, g, h, a, b, c, d);
584 ROUND_00_15(5, d, e, f, g, h, a, b, c);
586 ROUND_00_15(6, c, d, e, f, g, h, a, b);
588 ROUND_00_15(7, b, c, d, e, f, g, h, a);
590 ROUND_00_15(8, a, b, c, d, e, f, g, h);
592 ROUND_00_15(9, h, a, b, c, d, e, f, g);
594 ROUND_00_15(10, g, h, a, b, c, d, e, f);
596 ROUND_00_15(11, f, g, h, a, b, c, d, e);
598 ROUND_00_15(12, e, f, g, h, a, b, c, d);
600 ROUND_00_15(13, d, e, f, g, h, a, b, c);
602 ROUND_00_15(14, c, d, e, f, g, h, a, b);
604 ROUND_00_15(15, b, c, d, e, f, g, h, a);
606 T1 = X[0] = PULL64(W[0]);
607 ROUND_00_15(0, a, b, c, d, e, f, g, h);
608 T1 = X[1] = PULL64(W[1]);
609 ROUND_00_15(1, h, a, b, c, d, e, f, g);
610 T1 = X[2] = PULL64(W[2]);
611 ROUND_00_15(2, g, h, a, b, c, d, e, f);
612 T1 = X[3] = PULL64(W[3]);
613 ROUND_00_15(3, f, g, h, a, b, c, d, e);
614 T1 = X[4] = PULL64(W[4]);
615 ROUND_00_15(4, e, f, g, h, a, b, c, d);
616 T1 = X[5] = PULL64(W[5]);
617 ROUND_00_15(5, d, e, f, g, h, a, b, c);
618 T1 = X[6] = PULL64(W[6]);
619 ROUND_00_15(6, c, d, e, f, g, h, a, b);
620 T1 = X[7] = PULL64(W[7]);
621 ROUND_00_15(7, b, c, d, e, f, g, h, a);
622 T1 = X[8] = PULL64(W[8]);
623 ROUND_00_15(8, a, b, c, d, e, f, g, h);
624 T1 = X[9] = PULL64(W[9]);
625 ROUND_00_15(9, h, a, b, c, d, e, f, g);
626 T1 = X[10] = PULL64(W[10]);
627 ROUND_00_15(10, g, h, a, b, c, d, e, f);
628 T1 = X[11] = PULL64(W[11]);
629 ROUND_00_15(11, f, g, h, a, b, c, d, e);
630 T1 = X[12] = PULL64(W[12]);
631 ROUND_00_15(12, e, f, g, h, a, b, c, d);
632 T1 = X[13] = PULL64(W[13]);
633 ROUND_00_15(13, d, e, f, g, h, a, b, c);
634 T1 = X[14] = PULL64(W[14]);
635 ROUND_00_15(14, c, d, e, f, g, h, a, b);
636 T1 = X[15] = PULL64(W[15]);
637 ROUND_00_15(15, b, c, d, e, f, g, h, a);
640 for (i = 16; i < 80; i += 16) {
641 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
642 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
643 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
644 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
645 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
646 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
647 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
648 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
649 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
650 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
651 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
652 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
653 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
654 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
655 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
656 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
674 #endif /* SHA512_ASM */