From: Matt Caswell Date: Thu, 1 Feb 2018 13:53:56 +0000 (+0000) Subject: Remove the curve448 specific constant time implementation X-Git-Tag: OpenSSL_1_1_1-pre2~75 X-Git-Url: https://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff_plain;h=f918504f91780225d8edc9ac0d4308e005b4d078 Remove the curve448 specific constant time implementation Instead we should use the standard OpenSSL constant time routines. Reviewed-by: Bernd Edlinger (Merged from https://github.com/openssl/openssl/pull/5105) --- diff --git a/crypto/ec/curve448/arch_32/arch_intrinsics.h b/crypto/ec/curve448/arch_32/arch_intrinsics.h index af574cb0a9..1f5d2d7751 100644 --- a/crypto/ec/curve448/arch_32/arch_intrinsics.h +++ b/crypto/ec/curve448/arch_32/arch_intrinsics.h @@ -10,16 +10,14 @@ * Originally written by Mike Hamburg */ +#include "internal/constant_time_locl.h" + #ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__ # define __ARCH_ARCH_32_ARCH_INTRINSICS_H__ # define ARCH_WORD_BITS 32 -static ossl_inline uint32_t word_is_zero(uint32_t a) -{ - /* let's hope the compiler isn't clever enough to optimize this. */ - return (((uint64_t)a) - 1) >> 32; -} +#define word_is_zero(a) constant_time_is_zero_32(a) static ossl_inline uint64_t widemul(uint32_t a, uint32_t b) { diff --git a/crypto/ec/curve448/constant_time.h b/crypto/ec/curve448/constant_time.h deleted file mode 100644 index 61389a2b21..0000000000 --- a/crypto/ec/curve448/constant_time.h +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved. - * Copyright 2014 Cryptography Research, Inc. - * - * Licensed under the OpenSSL license (the "License"). You may not use - * this file except in compliance with the License. You can obtain a copy - * in the file LICENSE in the source distribution or at - * https://www.openssl.org/source/license.html - * - * Originally written by Mike Hamburg - */ - -#ifndef __CONSTANT_TIME_H__ -# define __CONSTANT_TIME_H__ 1 - -# include "word.h" -# include - -/* - * Constant-time operations on hopefully-compile-time-sized memory - * regions. Needed for flexibility / demagication: not all fields - * have sizes which are multiples of the vector width, necessitating - * a change from the Ed448 versions. - * - * These routines would be much simpler to define at the byte level, - * but if not vectorized they would be a significant fraction of the - * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of - * signing time, vs 6% on Haswell with its fancy AVX2 vectors. - * - * If the compiler could do a good job of autovectorizing the code, - * we could just leave it with the byte definition. But that's unlikely - * on most deployed compilers, especially if you consider that pcmpeq[size] - * is much faster than moving a scalar to the vector unit (which is what - * a naive autovectorizer will do with constant_time_lookup on Intel). - * - * Instead, we're putting our trust in the loop unroller and unswitcher. - */ - -# if defined(__GNUC__) || defined(__clang__) -/* - * Unaligned big (vector?) register. - */ -typedef struct { - big_register_t unaligned; -} __attribute((packed)) unaligned_br_t; - -/* - * Unaligned word register, for architectures where that matters. - */ -typedef struct { - word_t unaligned; -} __attribute((packed)) unaligned_word_t; - -# define HAS_UNALIGNED_STRUCTS -# define RESTRICT __restrict__ -#else -# define RESTRICT -# endif - -/* - * Constant-time conditional swap. - * - * If doswap, then swap elem_bytes between *a and *b. - * - * *a and *b must not alias. Also, they must be at least as aligned - * as their sizes, if the CPU cares about that sort of thing. - */ -static ossl_inline void constant_time_cond_swap(void *RESTRICT a_, - void *RESTRICT b_, - word_t elem_bytes, - mask_t doswap) -{ - word_t k; - unsigned char *a = (unsigned char *)a_; - unsigned char *b = (unsigned char *)b_; - big_register_t br_mask = br_set_to_mask(doswap); -# ifndef HAS_UNALIGNED_STRUCTS - unsigned char doswapc = (unsigned char)(doswap & 0xFF); -# endif - - for (k = 0; k <= elem_bytes - sizeof(big_register_t); - k += sizeof(big_register_t)) { - if (elem_bytes % sizeof(big_register_t)) { - /* unaligned */ -# ifdef HAS_UNALIGNED_STRUCTS - big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned - ^ ((unaligned_br_t *) (&b[k]))->unaligned; - - xor &= br_mask; - ((unaligned_br_t *)(&a[k]))->unaligned ^= xor; - ((unaligned_br_t *)(&b[k]))->unaligned ^= xor; -# else - size_t i; - - for (i = 0; i < sizeof(big_register_t); i++) { - unsigned char xor = a[k + i] ^ b[k + i]; - - xor &= doswapc; - a[k + i] ^= xor; - b[k + i] ^= xor; - } -# endif - } else { - /* aligned */ - big_register_t xor = *((big_register_t *) (&a[k])) - ^ *((big_register_t *) (&b[k])); - xor &= br_mask; - *((big_register_t *)(&a[k])) ^= xor; - *((big_register_t *)(&b[k])) ^= xor; - } - } - - if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { - for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) { - if (elem_bytes % sizeof(word_t)) { - /* unaligned */ -# ifdef HAS_UNALIGNED_STRUCTS - word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned - ^ ((unaligned_word_t *)(&b[k]))->unaligned; - - xor &= doswap; - ((unaligned_word_t *)(&a[k]))->unaligned ^= xor; - ((unaligned_word_t *)(&b[k]))->unaligned ^= xor; -# else - size_t i; - - for (i = 0; i < sizeof(word_t); i++) { - unsigned char xor = a[k + i] ^ b[k + i]; - - xor &= doswapc; - a[k + i] ^= xor; - b[k + i] ^= xor; - } -# endif - } else { - /* aligned */ - word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k])); - xor &= doswap; - *((word_t *)(&a[k])) ^= xor; - *((word_t *)(&b[k])) ^= xor; - } - } - } - - if (elem_bytes % sizeof(word_t)) { - for (; k < elem_bytes; k += 1) { - unsigned char xor = a[k] ^ b[k]; - - xor &= doswap; - a[k] ^= xor; - b[k] ^= xor; - } - } -} - -/* - * Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes); - * - * The table must be at least as aligned as elem_bytes. The output must be word aligned, - * and if the input size is vector aligned it must also be vector aligned. - * - * The table and output must not alias. - */ -static ossl_inline void constant_time_lookup(void *RESTRICT out_, - const void *table_, - word_t elem_bytes, - word_t n_table, - word_t idx) -{ - big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); - - /* Can't do pointer arithmetic on void * */ - unsigned char *out = (unsigned char *)out_; - const unsigned char *table = (const unsigned char *)table_; - word_t j, k; -# ifndef HAS_UNALIGNED_STRUCTS - unsigned char maskc; -# endif - - memset(out, 0, elem_bytes); - for (j = 0; j < n_table; j++, big_i -= big_one) { - big_register_t br_mask = br_is_zero(big_i); - word_t mask; - -# ifndef HAS_UNALIGNED_STRUCTS - maskc = (unsigned char)br_mask; -# endif - - for (k = 0; k <= elem_bytes - sizeof(big_register_t); - k += sizeof(big_register_t)) { - if (elem_bytes % sizeof(big_register_t)) { - /* unaligned */ -# ifdef HAS_UNALIGNED_STRUCTS - ((unaligned_br_t *)(out + k))->unaligned |= - br_mask - & ((const unaligned_br_t *) - (&table[k + j * elem_bytes]))->unaligned; -# else - size_t i; - - for (i = 0; i < sizeof(big_register_t); i++) - out[k + i] |= maskc - & ((unsigned char *) table) - [k + (j * elem_bytes) + i]; -# endif - } else { - /* aligned */ - *(big_register_t *)(out + k) |= - br_mask - & *(const big_register_t *)(&table[k + j * elem_bytes]); - } - } - - mask = word_is_zero(idx ^ j); -# ifndef HAS_UNALIGNED_STRUCTS - maskc = (unsigned char)mask; -# endif - if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { - for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) { - if (elem_bytes % sizeof(word_t)) { - /* input unaligned, output aligned */ -# ifdef HAS_UNALIGNED_STRUCTS - *(word_t *)(out + k) |= - mask - & ((const unaligned_word_t *) - (&table[k + j * elem_bytes]))->unaligned; -# else - size_t i; - - for (i = 0; i < sizeof(word_t); i++) - out[k + i] |= maskc - & ((unsigned char *)table) - [k + (j * elem_bytes) + i]; -# endif - } else { - /* aligned */ - *(word_t *)(out + k) |= - mask - & *(const word_t *)(&table[k + j * elem_bytes]); - } - } - } - - if (elem_bytes % sizeof(word_t)) { - for (; k < elem_bytes; k += 1) { - out[k] |= mask & table[k + j * elem_bytes]; - } - } - } -} - -/* - * Constant-time a = mask ? bTrue : bFalse. - * - * The input and output must be at least as aligned as alignment_bytes - * or their size, whichever is smaller. - * - * Note that the output is not __restrict__, but if it overlaps either - * input, it must be equal and not partially overlap. - */ -static ossl_inline void constant_time_select_c448(void *a_, - const void *bFalse_, - const void *bTrue_, - word_t elem_bytes, - mask_t mask, - size_t alignment_bytes) -{ - unsigned char *a = (unsigned char *)a_; - const unsigned char *bTrue = (const unsigned char *)bTrue_; - const unsigned char *bFalse = (const unsigned char *)bFalse_; - word_t k; - big_register_t br_mask = br_set_to_mask(mask); -# ifndef HAS_UNALIGNED_STRUCTS - unsigned char maskc = (unsigned char)mask; -# endif - - alignment_bytes |= elem_bytes; - - for (k = 0; k <= elem_bytes - sizeof(big_register_t); - k += sizeof(big_register_t)) { - if (alignment_bytes % sizeof(big_register_t)) { - /* unaligned */ -# ifdef HAS_UNALIGNED_STRUCTS - ((unaligned_br_t *)(&a[k]))->unaligned = - (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned) - | (~br_mask - & ((const unaligned_br_t *)(&bFalse[k]))->unaligned); -# else - size_t i; - - for (i = 0; i < sizeof(big_register_t); i++) - a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i]) - | (~maskc & ((unsigned char *)bFalse)[k + i]); -# endif - } else { - /* aligned */ - *(big_register_t *) (a + k) = - (br_mask & *(const big_register_t *)(&bTrue[k])) - | (~br_mask & *(const big_register_t *)(&bFalse[k])); - } - } - - if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { - for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) { - if (alignment_bytes % sizeof(word_t)) { - /* unaligned */ -# ifdef HAS_UNALIGNED_STRUCTS - ((unaligned_word_t *) (&a[k]))->unaligned = - (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned) - | (~mask & - ((const unaligned_word_t *)(&bFalse[k]))->unaligned); -# else - size_t i; - - for (i = 0; i < sizeof(word_t); i++) - a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i]) - | (~maskc & ((unsigned char *)bFalse)[k + i]); -# endif - } else { - /* aligned */ - *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k])) - | (~mask & *(const word_t *)(&bFalse[k])); - } - } - } - - if (elem_bytes % sizeof(word_t)) { - for (; k < elem_bytes; k += 1) { - a[k] = (mask & bTrue[k]) | (~mask & bFalse[k]); - } - } -} - -#undef RESTRICT -#undef HAS_UNALIGNED_STRUCTS - -#endif /* __CONSTANT_TIME_H__ */ diff --git a/crypto/ec/curve448/field.h b/crypto/ec/curve448/field.h index 2b7fc2aa95..5bc16bc2be 100644 --- a/crypto/ec/curve448/field.h +++ b/crypto/ec/curve448/field.h @@ -13,7 +13,7 @@ #ifndef __GF_H__ # define __GF_H__ -# include "constant_time.h" +# include "internal/constant_time_locl.h" # include # include # include "word.h" @@ -128,7 +128,20 @@ static ossl_inline void gf_mulw(gf c, const gf a, int32_t w) /* Constant time, x = is_z ? z : y */ static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z) { - constant_time_select_c448(x, y, z, sizeof(gf), is_z, 0); + size_t i; + + for (i = 0; i < NLIMBS; i++) { +#if ARCH_WORD_BITS == 32 + x[0].limb[i] = constant_time_select_32((uint32_t)is_z, + (uint32_t)(z[0].limb[i]), + (uint32_t)(y[0].limb[i])); +#else + /* Must be 64 bit */ + x[0].limb[i] = constant_time_select_64((uint64_t)is_z, + (uint64_t)(z[0].limb[i]), + (uint64_t)(y[0].limb[i])); +#endif + } } /* Constant time, if (neg) x=-x; */ @@ -142,7 +155,18 @@ static ossl_inline void gf_cond_neg(gf x, mask_t neg) /* Constant time, if (swap) (x,y) = (y,x); */ static ossl_inline void gf_cond_swap(gf x, gf_s * RESTRICT y, mask_t swap) { - constant_time_cond_swap(x, y, sizeof(gf_s), swap); + size_t i; + + for (i = 0; i < NLIMBS; i++) { +#if ARCH_WORD_BITS == 32 + constant_time_cond_swap_32((uint32_t)swap, (uint32_t *)&(x[0].limb[i]), + (uint32_t *)&(y->limb[i])); +#else + /* Must be 64 bit */ + constant_time_cond_swap_64((uint64_t)swap, (uint64_t *)&(x[0].limb[i]), + (uint64_t *)&(y->limb[i])); +#endif + } } #endif /* __GF_H__ */ diff --git a/crypto/ec/curve448/scalar.c b/crypto/ec/curve448/scalar.c index 018582d259..db4841e6bf 100644 --- a/crypto/ec/curve448/scalar.c +++ b/crypto/ec/curve448/scalar.c @@ -12,7 +12,6 @@ #include #include "word.h" -#include "constant_time.h" #include "point_448.h" static const c448_word_t MONTGOMERY_FACTOR = (c448_word_t) 0x3bd440fae918bc5; diff --git a/crypto/ec/curve448/word.h b/crypto/ec/curve448/word.h index 95af8a3bcb..a180850df8 100644 --- a/crypto/ec/curve448/word.h +++ b/crypto/ec/curve448/word.h @@ -92,49 +92,21 @@ typedef word_t vecmask_t __attribute__ ((vector_size(32))); # if defined(__AVX2__) # define VECTOR_ALIGNED __attribute__((aligned(32))) -typedef uint32x8_t big_register_t; typedef uint64x4_t uint64xn_t; typedef uint32x8_t uint32xn_t; - -static ossl_inline big_register_t br_set_to_mask(mask_t x) -{ - uint32_t y = (uint32_t)x; - big_register_t ret = { y, y, y, y, y, y, y, y }; - return ret; -} # elif defined(__SSE2__) # define VECTOR_ALIGNED __attribute__((aligned(16))) -typedef uint32x4_t big_register_t; typedef uint64x2_t uint64xn_t; typedef uint32x4_t uint32xn_t; - -static ossl_inline big_register_t br_set_to_mask(mask_t x) -{ - uint32_t y = x; - big_register_t ret = { y, y, y, y }; - return ret; -} # elif defined(__ARM_NEON__) # define VECTOR_ALIGNED __attribute__((aligned(16))) -typedef uint32x4_t big_register_t; typedef uint64x2_t uint64xn_t; typedef uint32x4_t uint32xn_t; - -static ossl_inline big_register_t br_set_to_mask(mask_t x) -{ - return vdupq_n_u32(x); -} # elif !defined(_MSC_VER) \ && (defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \ || defined(__aarch64__)) # define VECTOR_ALIGNED __attribute__((aligned(8))) -typedef uint64_t big_register_t, uint64xn_t; - typedef uint32_t uint32xn_t; -static ossl_inline big_register_t br_set_to_mask(mask_t x) -{ - return (big_register_t) x; -} # else # ifdef __GNUC__ # define VECTOR_ALIGNED __attribute__((aligned(4))) @@ -147,32 +119,8 @@ static ossl_inline big_register_t br_set_to_mask(mask_t x) # endif typedef uint64_t uint64xn_t; typedef uint32_t uint32xn_t; -typedef uint32_t big_register_t; - -static ossl_inline big_register_t br_set_to_mask(mask_t x) -{ - return (big_register_t) x; -} # endif -# if defined(__AVX2__) -static ossl_inline big_register_t br_is_zero(big_register_t x) -{ - return (big_register_t) (x == br_set_to_mask(0)); -} -# elif defined(__SSE2__) -static ossl_inline big_register_t br_is_zero(big_register_t x) -{ - return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128()); -} -# elif defined(__ARM_NEON__) -static ossl_inline big_register_t br_is_zero(big_register_t x) -{ - return vceqq_u32(x, x ^ x); -} -# else -# define br_is_zero word_is_zero -# endif /* PERF: vectorize vs unroll */ # ifdef __clang__