X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=blobdiff_plain;f=crypto%2Fblake2%2Fblake2b.c;h=e77bd9ac16ffd9681af0b1e46c9d50132605fbcc;hp=23ad58359c5609b00e4f9a3549a69e92777e09a7;hb=042597b0ace7e3adc8a79f6e096eef659ce1d9d5;hpb=2d0b44126763f989a4cbffbffe9d0c7518158bb7

diff --git a/crypto/blake2/blake2b.c b/crypto/blake2/blake2b.c
index 23ad58359c..e77bd9ac16 100644
--- a/crypto/blake2/blake2b.c
+++ b/crypto/blake2/blake2b.c
@@ -1,35 +1,33 @@
 /*
- * BLAKE2 reference source code package - reference C implementations
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
  *
- * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.
- * You may use this under the terms of the CC0, the OpenSSL Licence, or the
- * Apache Public License 2.0, at your option.  The terms of these licenses can
- * be found at:
- *
- * - OpenSSL license   : https://www.openssl.org/source/license.html
- * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
- * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- *
- * More information about the BLAKE2 hash function can be found at
- * https://blake2.net.
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
  */
 
-/* crypto/blake2/blake2b.c */
+/*
+ * Derived from the BLAKE2 reference implementation written by Samuel Neves.
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ * More information about the BLAKE2 hash function and its implementations
+ * can be found at https://blake2.net.
+ */
 
-#include <stdint.h>
+#include <assert.h>
 #include <string.h>
-#include <stdio.h>
 #include <openssl/crypto.h>
+#include "e_os.h"
 
-#include "internal/blake2_locl.h"
+#include "blake2_locl.h"
 #include "blake2_impl.h"
 
 static const uint64_t blake2b_IV[8] =
 {
-    0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
-    0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
-    0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
-    0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+    0x6a09e667f3bcc908U, 0xbb67ae8584caa73bU,
+    0x3c6ef372fe94f82bU, 0xa54ff53a5f1d36f1U,
+    0x510e527fade682d1U, 0x9b05688c2b3e6c1fU,
+    0x1f83d9abfb41bd6bU, 0x5be0cd19137e2179U
 };
 
 static const uint8_t blake2b_sigma[12][16] =
@@ -48,27 +46,19 @@ static const uint8_t blake2b_sigma[12][16] =
     { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
 };
 
-/* Some helper functions, not necessarily useful */
-static inline void blake2b_set_lastblock(BLAKE2B_CTX *S)
+/* Set that it's the last block we'll compress */
+static ossl_inline void blake2b_set_lastblock(BLAKE2B_CTX *S)
 {
     S->f[0] = -1;
 }
 
-/* Increment the data hashed couter. */
-static inline void blake2b_increment_counter(BLAKE2B_CTX *S,
-                                             const uint64_t inc)
-{
-    S->t[0] += inc;
-    S->t[1] += (S->t[0] < inc);
-}
-
 /* Initialize the hashing state. */
-static inline void blake2b_init0(BLAKE2B_CTX *S)
+static ossl_inline void blake2b_init0(BLAKE2B_CTX *S)
 {
     int i;
-    memset(S, 0, sizeof(BLAKE2B_CTX));
 
-    for(i = 0; i < 8; ++i) {
+    memset(S, 0, sizeof(BLAKE2B_CTX));
+    for (i = 0; i < 8; ++i) {
         S->h[i] = blake2b_IV[i];
     }
 }
@@ -82,9 +72,9 @@ static void blake2b_init_param(BLAKE2B_CTX *S, const BLAKE2B_PARAM *P)
 
     /* The param struct is carefully hand packed, and should be 64 bytes on
      * every platform. */
-    OPENSSL_assert(sizeof(BLAKE2B_PARAM) == 64);
+    assert(sizeof(BLAKE2B_PARAM) == 64);
     /* IV XOR ParamBlock */
-    for(i = 0; i < 8; ++i) {
+    for (i = 0; i < 8; ++i) {
         S->h[i] ^= load64(p + sizeof(S->h[i]) * i);
     }
 }
@@ -97,8 +87,8 @@ int BLAKE2b_Init(BLAKE2B_CTX *c)
     P->key_length    = 0;
     P->fanout        = 1;
     P->depth         = 1;
-    store32(&P->leaf_length, 0);
-    store64(&P->node_offset, 0);
+    store32(P->leaf_length, 0);
+    store64(P->node_offset, 0);
     P->node_depth    = 0;
     P->inner_length  = 0;
     memset(P->reserved, 0, sizeof(P->reserved));
@@ -110,69 +100,106 @@ int BLAKE2b_Init(BLAKE2B_CTX *c)
 
 /* Permute the state while xoring in the block of data. */
 static void blake2b_compress(BLAKE2B_CTX *S,
-                            const uint8_t block[BLAKE2B_BLOCKBYTES])
+                            const uint8_t *blocks,
+                            size_t len)
 {
     uint64_t m[16];
     uint64_t v[16];
     int i;
+    size_t increment;
 
-    for(i = 0; i < 16; ++i) {
-        m[i] = load64(block + i * sizeof(m[i]));
-    }
+    /*
+     * There are two distinct usage vectors for this function:
+     *
+     * a) BLAKE2b_Update uses it to process complete blocks,
+     *    possibly more than one at a time;
+     *
+     * b) BLAK2b_Final uses it to process last block, always
+     *    single but possibly incomplete, in which case caller
+     *    pads input with zeros.
+     */
+    assert(len < BLAKE2B_BLOCKBYTES || len % BLAKE2B_BLOCKBYTES == 0);
 
-    for(i = 0; i < 8; ++i) {
+    /*
+     * Since last block is always processed with separate call,
+     * |len| not being multiple of complete blocks can be observed
+     * only with |len| being less than BLAKE2B_BLOCKBYTES ("less"
+     * including even zero), which is why following assignment doesn't
+     * have to reside inside the main loop below.
+     */
+    increment = len < BLAKE2B_BLOCKBYTES ? len : BLAKE2B_BLOCKBYTES;
+
+    for (i = 0; i < 8; ++i) {
         v[i] = S->h[i];
     }
 
-    v[8]  = blake2b_IV[0];
-    v[9]  = blake2b_IV[1];
-    v[10] = blake2b_IV[2];
-    v[11] = blake2b_IV[3];
-    v[12] = S->t[0] ^ blake2b_IV[4];
-    v[13] = S->t[1] ^ blake2b_IV[5];
-    v[14] = S->f[0] ^ blake2b_IV[6];
-    v[15] = S->f[1] ^ blake2b_IV[7];
+    do {
+        for (i = 0; i < 16; ++i) {
+            m[i] = load64(blocks + i * sizeof(m[i]));
+        }
+
+        /* blake2b_increment_counter */
+        S->t[0] += increment;
+        S->t[1] += (S->t[0] < increment);
+
+        v[8]  = blake2b_IV[0];
+        v[9]  = blake2b_IV[1];
+        v[10] = blake2b_IV[2];
+        v[11] = blake2b_IV[3];
+        v[12] = S->t[0] ^ blake2b_IV[4];
+        v[13] = S->t[1] ^ blake2b_IV[5];
+        v[14] = S->f[0] ^ blake2b_IV[6];
+        v[15] = S->f[1] ^ blake2b_IV[7];
 #define G(r,i,a,b,c,d) \
-    do { \
-        a = a + b + m[blake2b_sigma[r][2*i+0]]; \
-        d = rotr64(d ^ a, 32); \
-        c = c + d; \
-        b = rotr64(b ^ c, 24); \
-        a = a + b + m[blake2b_sigma[r][2*i+1]]; \
-        d = rotr64(d ^ a, 16); \
-        c = c + d; \
-        b = rotr64(b ^ c, 63); \
-    } while(0)
+        do { \
+            a = a + b + m[blake2b_sigma[r][2*i+0]]; \
+            d = rotr64(d ^ a, 32); \
+            c = c + d; \
+            b = rotr64(b ^ c, 24); \
+            a = a + b + m[blake2b_sigma[r][2*i+1]]; \
+            d = rotr64(d ^ a, 16); \
+            c = c + d; \
+            b = rotr64(b ^ c, 63); \
+        } while (0)
 #define ROUND(r)  \
-    do { \
-        G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
-        G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
-        G(r,2,v[ 2],v[ 6],v[10],v[14]); \
-        G(r,3,v[ 3],v[ 7],v[11],v[15]); \
-        G(r,4,v[ 0],v[ 5],v[10],v[15]); \
-        G(r,5,v[ 1],v[ 6],v[11],v[12]); \
-        G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
-        G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
-    } while(0)
-    ROUND(0);
-    ROUND(1);
-    ROUND(2);
-    ROUND(3);
-    ROUND(4);
-    ROUND(5);
-    ROUND(6);
-    ROUND(7);
-    ROUND(8);
-    ROUND(9);
-    ROUND(10);
-    ROUND(11);
-
-    for(i = 0; i < 8; ++i) {
-        S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
-    }
+        do { \
+            G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+            G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+            G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+            G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+            G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+            G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+            G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+            G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+        } while (0)
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+        /* 3x size reduction on x86_64, almost 7x on ARMv8, 9x on ARMv4 */
+        for (i = 0; i < 12; i++) {
+            ROUND(i);
+        }
+#else
+        ROUND(0);
+        ROUND(1);
+        ROUND(2);
+        ROUND(3);
+        ROUND(4);
+        ROUND(5);
+        ROUND(6);
+        ROUND(7);
+        ROUND(8);
+        ROUND(9);
+        ROUND(10);
+        ROUND(11);
+#endif
 
+        for (i = 0; i < 8; ++i) {
+            S->h[i] = v[i] ^= v[i + 8] ^ S->h[i];
+        }
 #undef G
 #undef ROUND
+        blocks += increment;
+        len -= increment;
+    } while (len);
 }
 
 /* Absorb the input data into the hash state.  Always returns 1. */
@@ -181,42 +208,60 @@ int BLAKE2b_Update(BLAKE2B_CTX *c, const void *data, size_t datalen)
     const uint8_t *in = data;
     size_t fill;
 
-    while(datalen > 0) {
-        fill = sizeof(c->buf) - c->buflen;
-        /* Must be >, not >=, so that last block can be hashed differently */
-        if(datalen > fill) {
+    /*
+     * Intuitively one would expect intermediate buffer, c->buf, to
+     * store incomplete blocks. But in this case we are interested to
+     * temporarily stash even complete blocks, because last one in the
+     * stream has to be treated in special way, and at this point we
+     * don't know if last block in *this* call is last one "ever". This
+     * is the reason for why |datalen| is compared as >, and not >=.
+     */
+    fill = sizeof(c->buf) - c->buflen;
+    if (datalen > fill) {
+        if (c->buflen) {
             memcpy(c->buf + c->buflen, in, fill); /* Fill buffer */
-            blake2b_increment_counter(c, BLAKE2B_BLOCKBYTES);
-            blake2b_compress(c, c->buf); /* Compress */
+            blake2b_compress(c, c->buf, BLAKE2B_BLOCKBYTES);
             c->buflen = 0;
             in += fill;
             datalen -= fill;
-        } else { /* datalen <= fill */
-            memcpy(c->buf + c->buflen, in, datalen);
-            c->buflen += datalen; /* Be lazy, do not compress */
-            return 1;
+        }
+        if (datalen > BLAKE2B_BLOCKBYTES) {
+            size_t stashlen = datalen % BLAKE2B_BLOCKBYTES;
+            /*
+             * If |datalen| is a multiple of the blocksize, stash
+             * last complete block, it can be final one...
+             */
+            stashlen = stashlen ? stashlen : BLAKE2B_BLOCKBYTES;
+            datalen -= stashlen;
+            blake2b_compress(c, in, datalen);
+            in += datalen;
+            datalen = stashlen;
         }
     }
 
+    assert(datalen <= BLAKE2B_BLOCKBYTES);
+
+    memcpy(c->buf + c->buflen, in, datalen);
+    c->buflen += datalen; /* Be lazy, do not compress */
+
     return 1;
 }
 
 /*
- * Finalize the hash state in a way that avoids length extension attacks.
+ * Calculate the final hash and save it in md.
  * Always returns 1.
  */
 int BLAKE2b_Final(unsigned char *md, BLAKE2B_CTX *c)
 {
     int i;
 
-    blake2b_increment_counter(c, c->buflen);
     blake2b_set_lastblock(c);
     /* Padding */
     memset(c->buf + c->buflen, 0, sizeof(c->buf) - c->buflen);
-    blake2b_compress(c, c->buf);
+    blake2b_compress(c, c->buf, c->buflen);
 
     /* Output full hash to message digest */
-    for(i = 0; i < 8; ++i) {
+    for (i = 0; i < 8; ++i) {
         store64(md + sizeof(c->h[i]) * i, c->h[i]);
     }