1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
8 * IMPLEMENTATION NOTES.
10 * As you might have noticed 32-bit hash algorithms:
12 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
13 * - optimized versions implement two transform functions: one operating
14 * on [aligned] data in host byte order and one - on data in input
16 * - share common byte-order neutral collector and padding function
17 * implementations, ../md32_common.h;
19 * Neither of the above applies to this SHA-512 implementations. Reasons
20 * [in reverse order] are:
22 * - it's the only 64-bit hash algorithm for the moment of this writing,
23 * there is no need for common collector/padding implementation [yet];
24 * - by supporting only one transform function [which operates on
25 * *aligned* data in input stream byte order, big-endian in this case]
26 * we minimize burden of maintenance in two ways: a) collector/padding
27 * function is simpler; b) only one transform function to stare at;
28 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
29 * apply a number of optimizations to mitigate potential performance
30 * penalties caused by previous design decision;
34 * Implementation relies on the fact that "long long" is 64-bit on
35 * both 32- and 64-bit platforms. If some compiler vendor comes up
36 * with 128-bit long long, adjustment to sha.h would be required.
37 * As this implementation relies on 64-bit integer type, it's totally
38 * inappropriate for platforms which don't support it, most notably
40 * <appro@fy.chalmers.se>
45 #include <openssl/opensslconf.h>
46 #include <openssl/crypto.h>
47 #include <openssl/sha.h>
48 #include <openssl/opensslv.h>
50 const char *SHA512_version="SHA-512" OPENSSL_VERSION_PTEXT;
52 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
53 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
56 int SHA384_Init (SHA512_CTX *c)
58 c->h[0]=U64(0xcbbb9d5dc1059ed8);
59 c->h[1]=U64(0x629a292a367cd507);
60 c->h[2]=U64(0x9159015a3070dd17);
61 c->h[3]=U64(0x152fecd8f70e5939);
62 c->h[4]=U64(0x67332667ffc00b31);
63 c->h[5]=U64(0x8eb44a8768581511);
64 c->h[6]=U64(0xdb0c2e0d64f98fa7);
65 c->h[7]=U64(0x47b5481dbefa4fa4);
71 int SHA512_Init (SHA512_CTX *c)
73 c->h[0]=U64(0x6a09e667f3bcc908);
74 c->h[1]=U64(0xbb67ae8584caa73b);
75 c->h[2]=U64(0x3c6ef372fe94f82b);
76 c->h[3]=U64(0xa54ff53a5f1d36f1);
77 c->h[4]=U64(0x510e527fade682d1);
78 c->h[5]=U64(0x9b05688c2b3e6c1f);
79 c->h[6]=U64(0x1f83d9abfb41bd6b);
80 c->h[7]=U64(0x5be0cd19137e2179);
86 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
88 static int sha512_final (unsigned char *md, SHA512_CTX *c, size_t msz)
90 unsigned char *p=(unsigned char *)c->u.p;
95 if (n > (sizeof(c->u)-16))
96 memset (p+n,0,sizeof(c->u)-n), n=0,
99 memset (p+n,0,sizeof(c->u)-16-n);
101 c->u.d[SHA_LBLOCK-2] = c->Nh;
102 c->u.d[SHA_LBLOCK-1] = c->Nl;
104 p[sizeof(c->u)-1] = (c->Nl)&0xFF;
105 p[sizeof(c->u)-2] = (c->Nl>>8)&0xFF;
106 p[sizeof(c->u)-3] = (c->Nl>>16)&0xFF;
107 p[sizeof(c->u)-4] = (c->Nl>>24)&0xFF;
108 p[sizeof(c->u)-5] = (c->Nl>>32)&0xFF;
109 p[sizeof(c->u)-6] = (c->Nl>>40)&0xFF;
110 p[sizeof(c->u)-7] = (c->Nl>>48)&0xFF;
111 p[sizeof(c->u)-8] = (c->Nl>>56)&0xFF;
112 p[sizeof(c->u)-9] = (c->Nh)&0xFF;
113 p[sizeof(c->u)-10] = (c->Nh>>8)&0xFF;
114 p[sizeof(c->u)-11] = (c->Nh>>16)&0xFF;
115 p[sizeof(c->u)-12] = (c->Nh>>24)&0xFF;
116 p[sizeof(c->u)-13] = (c->Nh>>32)&0xFF;
117 p[sizeof(c->u)-14] = (c->Nh>>40)&0xFF;
118 p[sizeof(c->u)-15] = (c->Nh>>48)&0xFF;
119 p[sizeof(c->u)-16] = (c->Nh>>56)&0xFF;
122 sha512_block (c,p,1);
126 for (n=0;msz>0;n++,msz-=8)
128 SHA_LONG64 t = c->h[n];
130 *(md++) = (t>>56)&0xFF; *(md++) = (t>>48)&0xFF;
131 *(md++) = (t>>40)&0xFF; *(md++) = (t>>32)&0xFF;
132 *(md++) = (t>>24)&0xFF; *(md++) = (t>>16)&0xFF;
133 *(md++) = (t>>8)&0xFF; *(md++) = (t)&0xFF;
139 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
140 { return sha512_final (md,c,SHA384_DIGEST_LENGTH); }
141 int SHA512_Final (unsigned char *md,SHA512_CTX *c)
142 { return sha512_final (md,c,SHA512_DIGEST_LENGTH); }
144 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
147 unsigned char *p=c->u.p,*data=(unsigned char *)_data;
149 if (len==0) return 1;
151 l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
152 if (l < c->Nl) c->Nh++;
153 if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61);
158 size_t n = sizeof(c->u) - c->num;
162 memcpy (p+c->num,data,len), c->num += len;
166 memcpy (p+c->num,data,n), c->num = 0;
168 sha512_block (c,p,1);
172 if (len >= sizeof(c->u))
174 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
175 if ((size_t)data%sizeof(c->u.d[0]) != 0)
176 while (len >= sizeof(c->u))
177 memcpy (p,data,sizeof(c->u)),
178 sha512_block (c,p,1),
180 data += sizeof(c->u);
183 sha512_block (c,data,len/sizeof(c->u)),
189 if (len != 0) memcpy (p,data,len), c->num = (int)len;
194 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
195 { return SHA512_Update (c,data,len); }
197 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
198 { sha512_block (c,data,1); }
200 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
203 static unsigned char m[SHA384_DIGEST_LENGTH];
205 if (md == NULL) md=m;
207 SHA512_Update(&c,d,n);
208 sha512_final(md,&c,sizeof(m));
209 OPENSSL_cleanse(&c,sizeof(c));
213 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
216 static unsigned char m[SHA512_DIGEST_LENGTH];
218 if (md == NULL) md=m;
220 SHA512_Update(&c,d,n);
221 sha512_final(md,&c,sizeof(m));
222 OPENSSL_cleanse(&c,sizeof(c));
226 static const SHA_LONG64 K512[80] = {
227 U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
228 U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
229 U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
230 U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
231 U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
232 U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
233 U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
234 U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
235 U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
236 U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
237 U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
238 U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
239 U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
240 U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
241 U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
242 U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
243 U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
244 U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
245 U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
246 U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
247 U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
248 U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
249 U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
250 U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
251 U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
252 U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
253 U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
254 U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
255 U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
256 U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
257 U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
258 U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
259 U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
260 U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
261 U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
262 U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
263 U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
264 U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
265 U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
266 U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
269 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
270 # if defined(__x86_64) || defined(__x86_64__)
271 # define PULL64(x) ({ SHA_LONG64 ret=*((SHA_LONG64 *)(&(x))); \
280 #define B(x,j) (((SHA_LONG64)(*(((unsigned char *)(&x))+j)))<<((7-j)*8))
281 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
285 # if defined(_MSC_VER)
286 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
287 # define ROTR(a,n) _rotr64((a),n)
289 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
290 # if defined(__x86_64) || defined(__x86_64__)
291 # define ROTR(a,n) ({ unsigned long ret; \
296 # elif defined(_ARCH_PPC) && defined(__64BIT__)
297 # define ROTR(a,n) ({ unsigned long ret; \
298 asm ("rotrdi %0,%1,%2" \
300 : "r"(a),"K"(n)); ret; })
306 #define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
309 #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
310 #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
311 #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
312 #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
314 #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
315 #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
317 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM)
318 #define GO_FOR_SSE2(ctx,in,num) do { \
319 extern int OPENSSL_ia32cap; \
320 void sha512_block_sse2(void *,const void *,size_t); \
321 if (!(OPENSSL_ia32cap & (1<<26))) break; \
322 sha512_block_sse2(ctx->h,in,num); return; \
326 #ifdef OPENSSL_SMALL_FOOTPRINT
328 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
330 const SHA_LONG64 *W=in;
331 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
336 GO_FOR_SSE2(ctx,in,num);
341 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
342 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
349 T1 = X[i] = PULL64(W[i]);
351 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
352 T2 = Sigma0(a) + Maj(a,b,c);
353 h = g; g = f; f = e; e = d + T1;
354 d = c; c = b; b = a; a = T1 + T2;
359 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
360 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
362 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
363 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
364 T2 = Sigma0(a) + Maj(a,b,c);
365 h = g; g = f; f = e; e = d + T1;
366 d = c; c = b; b = a; a = T1 + T2;
369 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
370 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
378 #define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
379 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
380 h = Sigma0(a) + Maj(a,b,c); \
381 d += T1; h += T1; } while (0)
383 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X) do { \
384 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
385 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
386 T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
387 ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
389 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
391 const SHA_LONG64 *W=in;
392 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
397 GO_FOR_SSE2(ctx,in,num);
402 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
403 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
406 T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
407 T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
408 T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
409 T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
410 T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
411 T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
412 T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
413 T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
414 T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
415 T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
416 T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
417 T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
418 T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
419 T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
420 T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
421 T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
423 T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h);
424 T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g);
425 T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f);
426 T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e);
427 T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d);
428 T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c);
429 T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b);
430 T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a);
431 T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h);
432 T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g);
433 T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f);
434 T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e);
435 T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d);
436 T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c);
437 T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b);
438 T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a);
443 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
444 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
445 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
446 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
447 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
448 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
449 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
450 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
453 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
454 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;