X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fblake2%2Fblake2s.c;h=dc8fae4eb9080574a8af79be532a2fd75796f047;hp=eee615d9444ad4f215f98d3cc4afb674625549ac;hb=a1f82f06399f6c3cbee2009a498f4e7e620b2330;hpb=208527a75dd9584e2715c0eebcfad8c730d0dfae diff --git a/crypto/blake2/blake2s.c b/crypto/blake2/blake2s.c index eee615d944..dc8fae4eb9 100644 --- a/crypto/blake2/blake2s.c +++ b/crypto/blake2/blake2s.c @@ -2,7 +2,7 @@ * Copyright 2012, Samuel Neves . * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. * - * Licensed under the OpenSSL licenses, (the "License"); + * Licensed under the OpenSSL licenses (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * https://www.openssl.org/source/license.html @@ -15,11 +15,12 @@ * can be found at https://blake2.net. */ +#include #include #include #include "e_os.h" -#include "internal/blake2_locl.h" +#include "blake2_locl.h" #include "blake2_impl.h" static const uint32_t blake2s_IV[8] = @@ -48,21 +49,13 @@ static ossl_inline void blake2s_set_lastblock(BLAKE2S_CTX *S) S->f[0] = -1; } -/* Increment the data hashed counter. */ -static ossl_inline void blake2s_increment_counter(BLAKE2S_CTX *S, - const uint32_t inc) -{ - S->t[0] += inc; - S->t[1] += (S->t[0] < inc); -} - /* Initialize the hashing state. */ static ossl_inline void blake2s_init0(BLAKE2S_CTX *S) { int i; memset(S, 0, sizeof(BLAKE2S_CTX)); - for(i = 0; i < 8; ++i) { + for (i = 0; i < 8; ++i) { S->h[i] = blake2s_IV[i]; } } @@ -70,16 +63,16 @@ static ossl_inline void blake2s_init0(BLAKE2S_CTX *S) /* init2 xors IV with input parameter block */ static void blake2s_init_param(BLAKE2S_CTX *S, const BLAKE2S_PARAM *P) { - const uint32_t *p = (const uint32_t *)(P); + const uint8_t *p = (const uint8_t *)(P); size_t i; /* The param struct is carefully hand packed, and should be 32 bytes on * every platform. */ - OPENSSL_assert(sizeof(BLAKE2S_PARAM) == 32); + assert(sizeof(BLAKE2S_PARAM) == 32); blake2s_init0(S); /* IV XOR ParamBlock */ - for(i = 0; i < 8; ++i) { - S->h[i] ^= load32(&p[i]); + for (i = 0; i < 8; ++i) { + S->h[i] ^= load32(&p[i*4]); } } @@ -92,8 +85,8 @@ int BLAKE2s_Init(BLAKE2S_CTX *c) P->key_length = 0; P->fanout = 1; P->depth = 1; - store32(&P->leaf_length, 0); - store48(&P->node_offset, 0); + store32(P->leaf_length, 0); + store48(P->node_offset, 0); P->node_depth = 0; P->inner_length = 0; memset(P->salt, 0, sizeof(P->salt)); @@ -104,67 +97,104 @@ int BLAKE2s_Init(BLAKE2S_CTX *c) /* Permute the state while xoring in the block of data. */ static void blake2s_compress(BLAKE2S_CTX *S, - const uint8_t block[BLAKE2S_BLOCKBYTES]) + const uint8_t *blocks, + size_t len) { uint32_t m[16]; uint32_t v[16]; size_t i; + size_t increment; - for(i = 0; i < 16; ++i) { - m[i] = load32(block + i * sizeof(m[i])); - } + /* + * There are two distinct usage vectors for this function: + * + * a) BLAKE2s_Update uses it to process complete blocks, + * possibly more than one at a time; + * + * b) BLAK2s_Final uses it to process last block, always + * single but possibly incomplete, in which case caller + * pads input with zeros. + */ + assert(len < BLAKE2S_BLOCKBYTES || len % BLAKE2S_BLOCKBYTES == 0); - for(i = 0; i < 8; ++i) { + /* + * Since last block is always processed with separate call, + * |len| not being multiple of complete blocks can be observed + * only with |len| being less than BLAKE2S_BLOCKBYTES ("less" + * including even zero), which is why following assignment doesn't + * have to reside inside the main loop below. + */ + increment = len < BLAKE2S_BLOCKBYTES ? len : BLAKE2S_BLOCKBYTES; + + for (i = 0; i < 8; ++i) { v[i] = S->h[i]; } - v[ 8] = blake2s_IV[0]; - v[ 9] = blake2s_IV[1]; - v[10] = blake2s_IV[2]; - v[11] = blake2s_IV[3]; - v[12] = S->t[0] ^ blake2s_IV[4]; - v[13] = S->t[1] ^ blake2s_IV[5]; - v[14] = S->f[0] ^ blake2s_IV[6]; - v[15] = S->f[1] ^ blake2s_IV[7]; + do { + for (i = 0; i < 16; ++i) { + m[i] = load32(blocks + i * sizeof(m[i])); + } + + /* blake2s_increment_counter */ + S->t[0] += increment; + S->t[1] += (S->t[0] < increment); + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; #define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2s_sigma[r][2*i+0]]; \ - d = rotr32(d ^ a, 16); \ - c = c + d; \ - b = rotr32(b ^ c, 12); \ - a = a + b + m[blake2s_sigma[r][2*i+1]]; \ - d = rotr32(d ^ a, 8); \ - c = c + d; \ - b = rotr32(b ^ c, 7); \ - } while(0) + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while (0) #define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) - ROUND(0); - ROUND(1); - ROUND(2); - ROUND(3); - ROUND(4); - ROUND(5); - ROUND(6); - ROUND(7); - ROUND(8); - ROUND(9); - - for(i = 0; i < 8; ++i) { - S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - } + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while (0) +#if defined(OPENSSL_SMALL_FOOTPRINT) + /* almost 3x reduction on x86_64, 4.5x on ARMv8, 4x on ARMv4 */ + for (i = 0; i < 10; i++) { + ROUND(i); + } +#else + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); +#endif + for (i = 0; i < 8; ++i) { + S->h[i] = v[i] ^= v[i + 8] ^ S->h[i]; + } #undef G #undef ROUND + blocks += increment; + len -= increment; + } while (len); } /* Absorb the input data into the hash state. Always returns 1. */ @@ -173,23 +203,42 @@ int BLAKE2s_Update(BLAKE2S_CTX *c, const void *data, size_t datalen) const uint8_t *in = data; size_t fill; - while(datalen > 0) { - fill = sizeof(c->buf) - c->buflen; - /* Must be >, not >=, so that last block can be hashed differently */ - if(datalen > fill) { + /* + * Intuitively one would expect intermediate buffer, c->buf, to + * store incomplete blocks. But in this case we are interested to + * temporarily stash even complete blocks, because last one in the + * stream has to be treated in special way, and at this point we + * don't know if last block in *this* call is last one "ever". This + * is the reason for why |datalen| is compared as >, and not >=. + */ + fill = sizeof(c->buf) - c->buflen; + if (datalen > fill) { + if (c->buflen) { memcpy(c->buf + c->buflen, in, fill); /* Fill buffer */ - blake2s_increment_counter(c, BLAKE2S_BLOCKBYTES); - blake2s_compress(c, c->buf); /* Compress */ + blake2s_compress(c, c->buf, BLAKE2S_BLOCKBYTES); c->buflen = 0; in += fill; datalen -= fill; - } else { /* datalen <= fill */ - memcpy(c->buf + c->buflen, in, datalen); - c->buflen += datalen; /* Be lazy, do not compress */ - return 1; + } + if (datalen > BLAKE2S_BLOCKBYTES) { + size_t stashlen = datalen % BLAKE2S_BLOCKBYTES; + /* + * If |datalen| is a multiple of the blocksize, stash + * last complete block, it can be final one... + */ + stashlen = stashlen ? stashlen : BLAKE2S_BLOCKBYTES; + datalen -= stashlen; + blake2s_compress(c, in, datalen); + in += datalen; + datalen = stashlen; } } + assert(datalen <= BLAKE2S_BLOCKBYTES); + + memcpy(c->buf + c->buflen, in, datalen); + c->buflen += datalen; /* Be lazy, do not compress */ + return 1; } @@ -201,14 +250,13 @@ int BLAKE2s_Final(unsigned char *md, BLAKE2S_CTX *c) { int i; - blake2s_increment_counter(c, (uint32_t)c->buflen); blake2s_set_lastblock(c); /* Padding */ memset(c->buf + c->buflen, 0, sizeof(c->buf) - c->buflen); - blake2s_compress(c, c->buf); + blake2s_compress(c, c->buf, c->buflen); /* Output full hash to temp buffer */ - for(i = 0; i < 8; ++i) { + for (i = 0; i < 8; ++i) { store32(md + sizeof(c->h[i]) * i, c->h[i]); }