Make VIA Padlock engine more platform friendly and eliminate compiler
[openssl.git] / crypto / sha / sha256.c
1 /* crypto/sha/sha256.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
8
9 #include <stdlib.h>
10 #include <string.h>
11
12 #include <openssl/opensslconf.h>
13 #include <openssl/crypto.h>
14 #include <openssl/sha.h>
15 #include <openssl/opensslv.h>
16
17 const char *SHA256_version="SHA-256" OPENSSL_VERSION_PTEXT;
18
19 int SHA224_Init (SHA256_CTX *c)
20         {
21         c->h[0]=0xc1059ed8UL;   c->h[1]=0x367cd507UL;
22         c->h[2]=0x3070dd17UL;   c->h[3]=0xf70e5939UL;
23         c->h[4]=0xffc00b31UL;   c->h[5]=0x68581511UL;
24         c->h[6]=0x64f98fa7UL;   c->h[7]=0xbefa4fa4UL;
25         c->Nl=0;        c->Nh=0;
26         c->num=0;       c->md_len=SHA224_DIGEST_LENGTH;
27         return 1;
28         }
29
30 int SHA256_Init (SHA256_CTX *c)
31         {
32         c->h[0]=0x6a09e667UL;   c->h[1]=0xbb67ae85UL;
33         c->h[2]=0x3c6ef372UL;   c->h[3]=0xa54ff53aUL;
34         c->h[4]=0x510e527fUL;   c->h[5]=0x9b05688cUL;
35         c->h[6]=0x1f83d9abUL;   c->h[7]=0x5be0cd19UL;
36         c->Nl=0;        c->Nh=0;
37         c->num=0;       c->md_len=SHA256_DIGEST_LENGTH;
38         return 1;
39         }
40
41 unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md)
42         {
43         SHA256_CTX c;
44         static unsigned char m[SHA224_DIGEST_LENGTH];
45
46         if (md == NULL) md=m;
47         SHA224_Init(&c);
48         SHA256_Update(&c,d,n);
49         SHA256_Final(md,&c);
50         OPENSSL_cleanse(&c,sizeof(c));
51         return(md);
52         }
53
54 unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md)
55         {
56         SHA256_CTX c;
57         static unsigned char m[SHA256_DIGEST_LENGTH];
58
59         if (md == NULL) md=m;
60         SHA256_Init(&c);
61         SHA256_Update(&c,d,n);
62         SHA256_Final(md,&c);
63         OPENSSL_cleanse(&c,sizeof(c));
64         return(md);
65         }
66
67 int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
68 {   return SHA256_Update (c,data,len);   }
69 int SHA224_Final (unsigned char *md, SHA256_CTX *c)
70 {   return SHA256_Final (md,c);   }
71
72 #ifndef SHA_LONG_LOG2
73 #define SHA_LONG_LOG2   2       /* default to 32 bits */
74 #endif
75
76 #define DATA_ORDER_IS_BIG_ENDIAN
77
78 #define HASH_LONG               SHA_LONG
79 #define HASH_LONG_LOG2          SHA_LONG_LOG2
80 #define HASH_CTX                SHA256_CTX
81 #define HASH_CBLOCK             SHA_CBLOCK
82 #define HASH_LBLOCK             SHA_LBLOCK
83 /*
84  * Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
85  * default: case below covers for it. It's not clear however if it's
86  * permitted to truncate to amount of bytes not divisible by 4. I bet not,
87  * but if it is, then default: case shall be extended. For reference.
88  * Idea behind separate cases for pre-defined lenghts is to let the
89  * compiler decide if it's appropriate to unroll small loops.
90  */
91 #define HASH_MAKE_STRING(c,s)   do {    \
92         unsigned long ll;               \
93         unsigned int  n;                \
94         switch ((c)->md_len)            \
95         {   case SHA224_DIGEST_LENGTH:  \
96                 for (n=0;n<SHA224_DIGEST_LENGTH/4;n++)  \
97                 {   ll=(c)->h[n]; HOST_l2c(ll,(s));   } \
98                 break;                  \
99             case SHA256_DIGEST_LENGTH:  \
100                 for (n=0;n<SHA256_DIGEST_LENGTH/4;n++)  \
101                 {   ll=(c)->h[n]; HOST_l2c(ll,(s));   } \
102                 break;                  \
103             default:                    \
104                 if ((c)->md_len > SHA256_DIGEST_LENGTH) \
105                     return 0;                           \
106                 for (n=0;n<(c)->md_len/4;n++)           \
107                 {   ll=(c)->h[n]; HOST_l2c(ll,(s));   } \
108                 break;                  \
109         }                               \
110         } while (0)
111
112 #define HASH_UPDATE             SHA256_Update
113 #define HASH_TRANSFORM          SHA256_Transform
114 #define HASH_FINAL              SHA256_Final
115 #define HASH_BLOCK_HOST_ORDER   sha256_block_host_order
116 #define HASH_BLOCK_DATA_ORDER   sha256_block_data_order
117 void sha256_block_host_order (SHA256_CTX *ctx, const void *in, size_t num);
118 void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
119
120 #include "md32_common.h"
121
122 #ifdef SHA256_ASM
123 void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host);
124 #else
125 static const SHA_LONG K256[64] = {
126         0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
127         0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
128         0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL,
129         0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL,
130         0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL,
131         0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL,
132         0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL,
133         0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL,
134         0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL,
135         0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL,
136         0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL,
137         0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL,
138         0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL,
139         0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL,
140         0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL,
141         0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL };
142
143 /*
144  * FIPS specification refers to right rotations, while our ROTATE macro
145  * is left one. This is why you might notice that rotation coefficients
146  * differ from those observed in FIPS document by 32-N...
147  */
148 #define Sigma0(x)       (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
149 #define Sigma1(x)       (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
150 #define sigma0(x)       (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
151 #define sigma1(x)       (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
152
153 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
154 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
155
156 #ifdef OPENSSL_SMALL_FOOTPRINT
157
158 static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
159         {
160         unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
161         SHA_LONG        X[16];
162         int i;
163         const unsigned char *data=in;
164
165                         while (num--) {
166
167         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
168         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
169
170         if (host)
171                 {
172                 const SHA_LONG *W=(const SHA_LONG *)data;
173
174                 for (i=0;i<16;i++)
175                         {
176                         T1 = X[i] = W[i];
177                         T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
178                         T2 = Sigma0(a) + Maj(a,b,c);
179                         h = g;  g = f;  f = e;  e = d + T1;
180                         d = c;  c = b;  b = a;  a = T1 + T2;
181                         }
182                 }
183         else
184                 {
185                 SHA_LONG l;
186
187                 for (i=0;i<16;i++)
188                         {
189                         HOST_c2l(data,l); T1 = X[i] = l;
190                         T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
191                         T2 = Sigma0(a) + Maj(a,b,c);
192                         h = g;  g = f;  f = e;  e = d + T1;
193                         d = c;  c = b;  b = a;  a = T1 + T2;
194                         }
195                 }
196
197         for (;i<64;i++)
198                 {
199                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
200                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
201
202                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
203                 T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
204                 T2 = Sigma0(a) + Maj(a,b,c);
205                 h = g;  g = f;  f = e;  e = d + T1;
206                 d = c;  c = b;  b = a;  a = T1 + T2;
207                 }
208
209         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
210         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
211
212                         data += SHA256_CBLOCK;
213                         }
214 }
215
216 #else
217
218 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
219         T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];      \
220         h = Sigma0(a) + Maj(a,b,c);                     \
221         d += T1;        h += T1;                } while (0)
222
223 #define ROUND_16_63(i,a,b,c,d,e,f,g,h,X)        do {    \
224         s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);        \
225         s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);        \
226         T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];    \
227         ROUND_00_15(i,a,b,c,d,e,f,g,h);         } while (0)
228
229 static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host)
230         {
231         unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
232         SHA_LONG        X[16];
233         int i;
234         const unsigned char *data=in;
235
236                         while (num--) {
237
238         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
239         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
240
241         if (host)
242                 {
243                 const SHA_LONG *W=(const SHA_LONG *)data;
244
245                 T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
246                 T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
247                 T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
248                 T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
249                 T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
250                 T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
251                 T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
252                 T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
253                 T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
254                 T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
255                 T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
256                 T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
257                 T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
258                 T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
259                 T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
260                 T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
261                 }
262         else
263                 {
264                 SHA_LONG l;
265
266                 HOST_c2l(data,l); T1 = X[0] = l;  ROUND_00_15(0,a,b,c,d,e,f,g,h);
267                 HOST_c2l(data,l); T1 = X[1] = l;  ROUND_00_15(1,h,a,b,c,d,e,f,g);
268                 HOST_c2l(data,l); T1 = X[2] = l;  ROUND_00_15(2,g,h,a,b,c,d,e,f);
269                 HOST_c2l(data,l); T1 = X[3] = l;  ROUND_00_15(3,f,g,h,a,b,c,d,e);
270                 HOST_c2l(data,l); T1 = X[4] = l;  ROUND_00_15(4,e,f,g,h,a,b,c,d);
271                 HOST_c2l(data,l); T1 = X[5] = l;  ROUND_00_15(5,d,e,f,g,h,a,b,c);
272                 HOST_c2l(data,l); T1 = X[6] = l;  ROUND_00_15(6,c,d,e,f,g,h,a,b);
273                 HOST_c2l(data,l); T1 = X[7] = l;  ROUND_00_15(7,b,c,d,e,f,g,h,a);
274                 HOST_c2l(data,l); T1 = X[8] = l;  ROUND_00_15(8,a,b,c,d,e,f,g,h);
275                 HOST_c2l(data,l); T1 = X[9] = l;  ROUND_00_15(9,h,a,b,c,d,e,f,g);
276                 HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f);
277                 HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e);
278                 HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d);
279                 HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c);
280                 HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b);
281                 HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a);
282                 }
283
284         for (i=16;i<64;i+=8)
285                 {
286                 ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X);
287                 ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X);
288                 ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X);
289                 ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X);
290                 ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X);
291                 ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X);
292                 ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X);
293                 ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X);
294                 }
295
296         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
297         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
298
299                         data += SHA256_CBLOCK;
300                         }
301         }
302
303 #endif
304 #endif /* SHA256_ASM */
305
306 /*
307  * Idea is to trade couple of cycles for some space. On IA-32 we save
308  * about 4K in "big footprint" case. In "small footprint" case any gain
309  * is appreciated:-)
310  */
311 void HASH_BLOCK_HOST_ORDER (SHA256_CTX *ctx, const void *in, size_t num)
312 {   sha256_block (ctx,in,num,1);   }
313
314 void HASH_BLOCK_DATA_ORDER (SHA256_CTX *ctx, const void *in, size_t num)
315 {   sha256_block (ctx,in,num,0);   }
316
317 #endif /* OPENSSL_NO_SHA256 */