From: Matt Caswell Date: Tue, 12 Dec 2017 14:17:40 +0000 (+0000) Subject: Fix build errors for Curve448 code on Windows (VC-WIN32 and VC-WIN64A) X-Git-Tag: OpenSSL_1_1_1-pre2~88 X-Git-Url: https://git.openssl.org/?p=openssl.git;a=commitdiff_plain;h=52a9587c78a135ff200b8c92f8aad7ea1bd4de75 Fix build errors for Curve448 code on Windows (VC-WIN32 and VC-WIN64A) Reviewed-by: Bernd Edlinger (Merged from https://github.com/openssl/openssl/pull/5105) --- diff --git a/crypto/ec/curve448/arch_32/arch_intrinsics.h b/crypto/ec/curve448/arch_32/arch_intrinsics.h index 33439822fe..22fd8bd0f4 100644 --- a/crypto/ec/curve448/arch_32/arch_intrinsics.h +++ b/crypto/ec/curve448/arch_32/arch_intrinsics.h @@ -15,15 +15,13 @@ # define ARCH_WORD_BITS 32 -static __inline__ __attribute((always_inline, unused)) -uint32_t word_is_zero(uint32_t a) +static ossl_inline uint32_t word_is_zero(uint32_t a) { /* let's hope the compiler isn't clever enough to optimize this. */ return (((uint64_t)a) - 1) >> 32; } -static __inline__ __attribute((always_inline, unused)) -uint64_t widemul(uint32_t a, uint32_t b) +static ossl_inline uint64_t widemul(uint32_t a, uint32_t b) { return ((uint64_t)a) * b; } diff --git a/crypto/ec/curve448/arch_32/f_impl.c b/crypto/ec/curve448/arch_32/f_impl.c index ef01e49a66..d1825cc60f 100644 --- a/crypto/ec/curve448/arch_32/f_impl.c +++ b/crypto/ec/curve448/arch_32/f_impl.c @@ -20,7 +20,7 @@ # define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0) #endif -void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs) +void gf_mul(gf_s * RESTRICT cs, const gf as, const gf bs) { const uint32_t *a = as->limb, *b = bs->limb; uint32_t *c = cs->limb; @@ -71,7 +71,7 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs) c[1] += ((uint32_t)(accum1)); } -void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b) +void gf_mulw_unsigned(gf_s * RESTRICT cs, const gf as, uint32_t b) { const uint32_t *a = as->limb; uint32_t *c = cs->limb; @@ -88,15 +88,15 @@ void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b) }); accum0 += accum8 + c[8]; - c[8] = accum0 & mask; - c[9] += accum0 >> 28; + c[8] = ((uint32_t)accum0) & mask; + c[9] += (uint32_t)(accum0 >> 28); accum8 += c[0]; - c[0] = accum8 & mask; - c[1] += accum8 >> 28; + c[0] = ((uint32_t)accum8) & mask; + c[1] += (uint32_t)(accum8 >> 28); } -void gf_sqr(gf_s * __restrict__ cs, const gf as) +void gf_sqr(gf_s * RESTRICT cs, const gf as) { gf_mul(cs, as, as); /* Performs better with a dedicated square */ } diff --git a/crypto/ec/curve448/constant_time.h b/crypto/ec/curve448/constant_time.h index 3f02694e37..a75fcedd75 100644 --- a/crypto/ec/curve448/constant_time.h +++ b/crypto/ec/curve448/constant_time.h @@ -36,19 +36,26 @@ * Instead, we're putting our trust in the loop unroller and unswitcher. */ +# if defined(__GNUC__) || defined(__clang__) /* * Unaligned big (vector?) register. */ typedef struct { big_register_t unaligned; -} __attribute__ ((packed)) unaligned_br_t; +} __attribute((packed)) unaligned_br_t; /* * Unaligned word register, for architectures where that matters. */ typedef struct { word_t unaligned; -} __attribute__ ((packed)) unaligned_word_t; +} __attribute((packed)) unaligned_word_t; + +# define HAS_UNALIGNED_STRUCTS +# define RESTRICT __restrict__ +#else +# define RESTRICT +# endif /* * Constant-time conditional swap. @@ -58,26 +65,41 @@ typedef struct { * *a and *b must not alias. Also, they must be at least as aligned * as their sizes, if the CPU cares about that sort of thing. */ -static ossl_inline void constant_time_cond_swap(void *__restrict__ a_, - void *__restrict__ b_, +static ossl_inline void constant_time_cond_swap(void *RESTRICT a_, + void *RESTRICT b_, word_t elem_bytes, mask_t doswap) { word_t k; unsigned char *a = (unsigned char *)a_; unsigned char *b = (unsigned char *)b_; - big_register_t br_mask = br_set_to_mask(doswap); +# ifndef HAS_UNALIGNED_STRUCTS + unsigned char doswapc = (unsigned char)(doswap & 0xFF); +# endif + for (k = 0; k <= elem_bytes - sizeof(big_register_t); k += sizeof(big_register_t)) { if (elem_bytes % sizeof(big_register_t)) { /* unaligned */ +# ifdef HAS_UNALIGNED_STRUCTS big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned ^ ((unaligned_br_t *) (&b[k]))->unaligned; xor &= br_mask; ((unaligned_br_t *)(&a[k]))->unaligned ^= xor; ((unaligned_br_t *)(&b[k]))->unaligned ^= xor; +# else + size_t i; + + for (i = 0; i < sizeof(big_register_t); i++) { + unsigned char xor = a[k + i] ^ b[k + i]; + + xor &= doswapc; + a[k + i] ^= xor; + b[k + i] ^= xor; + } +# endif } else { /* aligned */ big_register_t xor = *((big_register_t *) (&a[k])) @@ -92,12 +114,24 @@ static ossl_inline void constant_time_cond_swap(void *__restrict__ a_, for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) { if (elem_bytes % sizeof(word_t)) { /* unaligned */ +# ifdef HAS_UNALIGNED_STRUCTS word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned ^ ((unaligned_word_t *)(&b[k]))->unaligned; xor &= doswap; ((unaligned_word_t *)(&a[k]))->unaligned ^= xor; ((unaligned_word_t *)(&b[k]))->unaligned ^= xor; +# else + size_t i; + + for (i = 0; i < sizeof(word_t); i++) { + unsigned char xor = a[k + i] ^ b[k + i]; + + xor &= doswapc; + a[k + i] ^= xor; + b[k + i] ^= xor; + } +# endif } else { /* aligned */ word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k])); @@ -127,7 +161,7 @@ static ossl_inline void constant_time_cond_swap(void *__restrict__ a_, * * The table and output must not alias. */ -static ossl_inline void constant_time_lookup(void *__restrict__ out_, +static ossl_inline void constant_time_lookup(void *RESTRICT out_, const void *table_, word_t elem_bytes, word_t n_table, @@ -139,20 +173,36 @@ static ossl_inline void constant_time_lookup(void *__restrict__ out_, unsigned char *out = (unsigned char *)out_; const unsigned char *table = (const unsigned char *)table_; word_t j, k; +# ifndef HAS_UNALIGNED_STRUCTS + unsigned char maskc; +# endif memset(out, 0, elem_bytes); for (j = 0; j < n_table; j++, big_i -= big_one) { big_register_t br_mask = br_is_zero(big_i); word_t mask; +# ifndef HAS_UNALIGNED_STRUCTS + maskc = (unsigned char)br_mask; +# endif + for (k = 0; k <= elem_bytes - sizeof(big_register_t); k += sizeof(big_register_t)) { if (elem_bytes % sizeof(big_register_t)) { /* unaligned */ +# ifdef HAS_UNALIGNED_STRUCTS ((unaligned_br_t *)(out + k))->unaligned |= br_mask & ((const unaligned_br_t *) (&table[k + j * elem_bytes]))->unaligned; +# else + size_t i; + + for (i = 0; i < sizeof(big_register_t); i++) + out[k + i] |= maskc + & ((unsigned char *) table) + [k + (j * elem_bytes) + i]; +# endif } else { /* aligned */ *(big_register_t *)(out + k) |= @@ -162,14 +212,26 @@ static ossl_inline void constant_time_lookup(void *__restrict__ out_, } mask = word_is_zero(idx ^ j); +# ifndef HAS_UNALIGNED_STRUCTS + maskc = (unsigned char)mask; +# endif if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) { if (elem_bytes % sizeof(word_t)) { /* input unaligned, output aligned */ +# ifdef HAS_UNALIGNED_STRUCTS *(word_t *)(out + k) |= mask & ((const unaligned_word_t *) (&table[k + j * elem_bytes]))->unaligned; +# else + size_t i; + + for (i = 0; i < sizeof(word_t); i++) + out[k + i] |= maskc + & ((unsigned char *)table) + [k + (j * elem_bytes) + i]; +# endif } else { /* aligned */ *(word_t *)(out + k) |= @@ -208,6 +270,9 @@ static ossl_inline void constant_time_select(void *a_, const unsigned char *bFalse = (const unsigned char *)bFalse_; word_t k; big_register_t br_mask = br_set_to_mask(mask); +# ifndef HAS_UNALIGNED_STRUCTS + unsigned char maskc = (unsigned char)mask; +# endif alignment_bytes |= elem_bytes; @@ -215,10 +280,18 @@ static ossl_inline void constant_time_select(void *a_, k += sizeof(big_register_t)) { if (alignment_bytes % sizeof(big_register_t)) { /* unaligned */ +# ifdef HAS_UNALIGNED_STRUCTS ((unaligned_br_t *)(&a[k]))->unaligned = (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned) | (~br_mask & ((const unaligned_br_t *)(&bFalse[k]))->unaligned); +# else + size_t i; + + for (i = 0; i < sizeof(big_register_t); i++) + a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i]) + | (~maskc & ((unsigned char *)bFalse)[k + i]); +# endif } else { /* aligned */ *(big_register_t *) (a + k) = @@ -231,10 +304,18 @@ static ossl_inline void constant_time_select(void *a_, for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) { if (alignment_bytes % sizeof(word_t)) { /* unaligned */ +# ifdef HAS_UNALIGNED_STRUCTS ((unaligned_word_t *) (&a[k]))->unaligned = (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned) | (~mask & ((const unaligned_word_t *)(&bFalse[k]))->unaligned); +# else + size_t i; + + for (i = 0; i < sizeof(word_t); i++) + a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i]) + | (~maskc & ((unsigned char *)bFalse)[k + i]); +# endif } else { /* aligned */ *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k])) @@ -250,4 +331,7 @@ static ossl_inline void constant_time_select(void *a_, } } +#undef RESTRICT +#undef HAS_UNALIGNED_STRUCTS + #endif /* __CONSTANT_TIME_H__ */ diff --git a/crypto/ec/curve448/curve448.c b/crypto/ec/curve448/curve448.c index 920705bfcd..cd224b0cdc 100644 --- a/crypto/ec/curve448/curve448.c +++ b/crypto/ec/curve448/curve448.c @@ -237,7 +237,7 @@ c448_bool_t curve448_point_valid(const curve448_point_t p) return mask_to_bool(out); } -static ossl_inline void constant_time_lookup_niels(niels_s * __restrict__ ni, +static ossl_inline void constant_time_lookup_niels(niels_s * RESTRICT ni, const niels_t * table, int nelts, int idx) { @@ -425,7 +425,7 @@ c448_error_t x448_int(uint8_t out[X_PUBLIC_BYTES], sb = -1; k_t = (sb >> (t % 8)) & 1; - k_t = -k_t; /* set to all 0s or all 1s */ + k_t = 0 - k_t; /* set to all 0s or all 1s */ swap ^= k_t; gf_cond_swap(x2, x3, swap); @@ -498,7 +498,7 @@ void x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES], memcpy(scalar2, scalar, sizeof(scalar2)); scalar2[0] &= -(uint8_t)COFACTOR; - scalar2[X_PRIVATE_BYTES - 1] &= ~(-1u << ((X_PRIVATE_BITS + 7) % 8)); + scalar2[X_PRIVATE_BYTES - 1] &= ~((0u - 1u) << ((X_PRIVATE_BITS + 7) % 8)); scalar2[X_PRIVATE_BYTES - 1] |= 1 << ((X_PRIVATE_BITS + 7) % 8); curve448_scalar_decode_long(the_scalar, scalar2, sizeof(scalar2)); @@ -517,6 +517,45 @@ struct smvt_control { int power, addend; }; +#if defined(__GNUC__) || defined(__clang__) +# define NUMTRAILINGZEROS __builtin_ctz +#else +# define NUMTRAILINGZEROS numtrailingzeros +static uint32_t numtrailingzeros(uint32_t i) +{ + unsigned int tmp; + uint32_t num = 31; + + if (i == 0) + return 32; + + tmp = i << 16; + if (tmp != 0) { + i = tmp; + num -= 16; + } + tmp = i << 8; + if (tmp != 0) { + i = tmp; + num -= 8; + } + tmp = i << 4; + if (tmp != 0) { + i = tmp; + num -= 4; + } + tmp = i << 2; + if (tmp != 0) { + i = tmp; + num -= 2; + } + if ((i << 1) != 0) + num--; + + return num; +} +#endif + static int recode_wnaf(struct smvt_control *control, /* [nbits/(table_bits + 1) + 3] */ const curve448_scalar_t scalar, @@ -549,7 +588,7 @@ static int recode_wnaf(struct smvt_control *control, } while (current & 0xFFFF) { - uint32_t pos = __builtin_ctz((uint32_t)current); + uint32_t pos = NUMTRAILINGZEROS((uint32_t)current); uint32_t odd = (uint32_t)current >> pos; int32_t delta = odd & mask; diff --git a/crypto/ec/curve448/curve448utils.h b/crypto/ec/curve448/curve448utils.h index 10902e7bed..c92ce2eab8 100644 --- a/crypto/ec/curve448/curve448utils.h +++ b/crypto/ec/curve448/curve448utils.h @@ -62,7 +62,7 @@ typedef int64_t c448_dsword_t; # endif /* C448_TRUE = -1 so that C448_TRUE & x = x */ -static const c448_bool_t C448_TRUE = -(c448_bool_t) 1; +static const c448_bool_t C448_TRUE = 0 - (c448_bool_t)1; /* C448_FALSE = 0 so that C448_FALSE & x = 0 */ static const c448_bool_t C448_FALSE = 0; diff --git a/crypto/ec/curve448/eddsa.c b/crypto/ec/curve448/eddsa.c index aa182cdbe7..5051d646fd 100644 --- a/crypto/ec/curve448/eddsa.c +++ b/crypto/ec/curve448/eddsa.c @@ -150,27 +150,28 @@ c448_error_t c448_ed448_sign( return C448_FAILURE; { - /* Schedule the secret key */ - struct { - uint8_t secret_scalar_ser[EDDSA_448_PRIVATE_BYTES]; - uint8_t seed[EDDSA_448_PRIVATE_BYTES]; - } __attribute__ ((packed)) expanded; + /* + * Schedule the secret key, First EDDSA_448_PRIVATE_BYTES is serialised + * secret scalar,next EDDSA_448_PRIVATE_BYTES bytes is the seed. + */ + uint8_t expanded[EDDSA_448_PRIVATE_BYTES * 2]; - if (!oneshot_hash((uint8_t *)&expanded, sizeof(expanded), privkey, + if (!oneshot_hash(expanded, sizeof(expanded), privkey, EDDSA_448_PRIVATE_BYTES)) goto err; - clamp(expanded.secret_scalar_ser); - curve448_scalar_decode_long(secret_scalar, expanded.secret_scalar_ser, - sizeof(expanded.secret_scalar_ser)); + clamp(expanded); + curve448_scalar_decode_long(secret_scalar, expanded, + EDDSA_448_PRIVATE_BYTES); /* Hash to create the nonce */ if (!hash_init_with_dom(hashctx, prehashed, 0, context, context_len) - || !EVP_DigestUpdate(hashctx, expanded.seed, sizeof(expanded.seed)) + || !EVP_DigestUpdate(hashctx, expanded + EDDSA_448_PRIVATE_BYTES, + EDDSA_448_PRIVATE_BYTES) || !EVP_DigestUpdate(hashctx, message, message_len)) { - OPENSSL_cleanse(&expanded, sizeof(expanded)); + OPENSSL_cleanse(expanded, sizeof(expanded)); goto err; } - OPENSSL_cleanse(&expanded, sizeof(expanded)); + OPENSSL_cleanse(expanded, sizeof(expanded)); } /* Decode the nonce */ diff --git a/crypto/ec/curve448/f_generic.c b/crypto/ec/curve448/f_generic.c index 6a3442231e..2fa97e5aa5 100644 --- a/crypto/ec/curve448/f_generic.c +++ b/crypto/ec/curve448/f_generic.c @@ -22,7 +22,7 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit) { unsigned int j = 0, fill = 0; dword_t buffer = 0; - unsigned int i; + int i; gf red; gf_copy(red, x); @@ -36,7 +36,7 @@ void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit) fill += LIMB_PLACE_VALUE(LIMBPERM(j)); j++; } - serial[i] = buffer; + serial[i] = (uint8_t)buffer; fill -= 8; buffer >>= 8; } @@ -48,7 +48,7 @@ mask_t gf_hibit(const gf x) gf y; gf_add(y, x, x); gf_strong_reduce(y); - return -(y->limb[0] & 1); + return 0 - (y->limb[0] & 1); } /* Return high bit of x = low bit of 2x mod p */ @@ -57,7 +57,7 @@ mask_t gf_lobit(const gf x) gf y; gf_copy(y, x); gf_strong_reduce(y); - return -(y->limb[0] & 1); + return 0 - (y->limb[0] & 1); } /* Deserialize from wire format; return -1 on success and 0 on failure. */ @@ -80,16 +80,16 @@ mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit, fill += 8; j++; } - x->limb[LIMBPERM(i)] = - (i < NLIMBS - 1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer; + x->limb[LIMBPERM(i)] = (word_t) + ((i < NLIMBS - 1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer); fill -= LIMB_PLACE_VALUE(LIMBPERM(i)); buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i)); scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8 * sizeof(word_t)); } - succ = with_hibit ? -(mask_t) 1 : ~gf_hibit(x); - return succ & word_is_zero(buffer) & ~word_is_zero(scarry); + succ = with_hibit ? 0 - (mask_t) 1 : ~gf_hibit(x); + return succ & word_is_zero((word_t)buffer) & ~word_is_zero((word_t)scarry); } /* Reduce to canonical form. */ @@ -120,7 +120,7 @@ void gf_strong_reduce(gf a) */ assert(word_is_zero(scarry) | word_is_zero(scarry + 1)); - scarry_0 = scarry; + scarry_0 = (word_t)scarry; /* add it back */ for (i = 0; i < NLIMBS; i++) { diff --git a/crypto/ec/curve448/field.h b/crypto/ec/curve448/field.h index e66447e485..5cf7d62e1c 100644 --- a/crypto/ec/curve448/field.h +++ b/crypto/ec/curve448/field.h @@ -21,17 +21,26 @@ # define NLIMBS (64/sizeof(word_t)) # define X_SER_BYTES 56 # define SER_BYTES 56 + +# if defined(__GNUC__) || defined(__clang__) +# define INLINE_UNUSED __inline__ __attribute__((unused,always_inline)) +# define RESTRICT __restrict__ +# define ALIGNED __attribute__((aligned(32))) +# else +# define INLINE_UNUSED ossl_inline +# define RESTRICT +# define ALIGNED +# endif + typedef struct gf_s { word_t limb[NLIMBS]; -} __attribute__ ((aligned(32))) gf_s, gf[1]; +} ALIGNED gf_s, gf[1]; /* RFC 7748 support */ # define X_PUBLIC_BYTES X_SER_BYTES # define X_PRIVATE_BYTES X_PUBLIC_BYTES # define X_PRIVATE_BITS 448 -# define INLINE_UNUSED __inline__ __attribute__((unused,always_inline)) - static INLINE_UNUSED void gf_copy(gf out, const gf a) { *out = *a; @@ -45,9 +54,9 @@ static INLINE_UNUSED void gf_weak_reduce(gf inout); void gf_strong_reduce(gf inout); void gf_add(gf out, const gf a, const gf b); void gf_sub(gf out, const gf a, const gf b); -void gf_mul(gf_s * __restrict__ out, const gf a, const gf b); -void gf_mulw_unsigned(gf_s * __restrict__ out, const gf a, uint32_t b); -void gf_sqr(gf_s * __restrict__ out, const gf a); +void gf_mul(gf_s * RESTRICT out, const gf a, const gf b); +void gf_mulw_unsigned(gf_s * RESTRICT out, const gf a, uint32_t b); +void gf_sqr(gf_s * RESTRICT out, const gf a); mask_t gf_isr(gf a, const gf x); /** a^2 x = 1, QNR, or 0 if x=0. Return true if successful */ mask_t gf_eq(const gf x, const gf y); mask_t gf_lobit(const gf x); @@ -67,7 +76,7 @@ mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit, static const gf ZERO = {{{0}}}, ONE = {{{1}}}; /* Square x, n times. */ -static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n) +static ossl_inline void gf_sqrn(gf_s * RESTRICT y, const gf x, int n) { gf tmp; assert(n > 0); @@ -131,7 +140,7 @@ static ossl_inline void gf_cond_neg(gf x, mask_t neg) } /* Constant time, if (swap) (x,y) = (y,x); */ -static ossl_inline void gf_cond_swap(gf x, gf_s * __restrict__ y, mask_t swap) +static ossl_inline void gf_cond_swap(gf x, gf_s * RESTRICT y, mask_t swap) { constant_time_cond_swap(x, y, sizeof(gf_s), swap); } diff --git a/crypto/ec/curve448/scalar.c b/crypto/ec/curve448/scalar.c index 3c61ed6f06..a0215df97f 100644 --- a/crypto/ec/curve448/scalar.c +++ b/crypto/ec/curve448/scalar.c @@ -57,15 +57,15 @@ static void sc_subx(curve448_scalar_t out, for (i = 0; i < C448_SCALAR_LIMBS; i++) { chain = (chain + accum[i]) - sub->limb[i]; - out->limb[i] = chain; + out->limb[i] = (c448_word_t)chain; chain >>= WBITS; } - borrow = chain + extra; /* = 0 or -1 */ + borrow = (c448_word_t)chain + extra; /* = 0 or -1 */ chain = 0; for (i = 0; i < C448_SCALAR_LIMBS; i++) { chain = (chain + out->limb[i]) + (p->limb[i] & borrow); - out->limb[i] = chain; + out->limb[i] = (c448_word_t)chain; chain >>= WBITS; } } @@ -84,10 +84,10 @@ static void sc_montmul(curve448_scalar_t out, const curve448_scalar_t a, c448_dword_t chain = 0; for (j = 0; j < C448_SCALAR_LIMBS; j++) { chain += ((c448_dword_t) mand) * mier[j] + accum[j]; - accum[j] = chain; + accum[j] = (c448_word_t)chain; chain >>= WBITS; } - accum[j] = chain; + accum[j] = (c448_word_t)chain; mand = accum[0] * MONTGOMERY_FACTOR; chain = 0; @@ -95,12 +95,12 @@ static void sc_montmul(curve448_scalar_t out, const curve448_scalar_t a, for (j = 0; j < C448_SCALAR_LIMBS; j++) { chain += (c448_dword_t) mand *mier[j] + accum[j]; if (j) - accum[j - 1] = chain; + accum[j - 1] = (c448_word_t)chain; chain >>= WBITS; } chain += accum[j]; chain += hi_carry; - accum[j - 1] = chain; + accum[j - 1] = (c448_word_t)chain; hi_carry = chain >> WBITS; } @@ -128,10 +128,10 @@ void curve448_scalar_add(curve448_scalar_t out, const curve448_scalar_t a, for (i = 0; i < C448_SCALAR_LIMBS; i++) { chain = (chain + a->limb[i]) + b->limb[i]; - out->limb[i] = chain; + out->limb[i] = (c448_word_t)chain; chain >>= WBITS; } - sc_subx(out, out->limb, sc_p, sc_p, chain); + sc_subx(out, out->limb, sc_p, sc_p, (c448_word_t)chain); } static ossl_inline void scalar_decode_short(curve448_scalar_t s, @@ -163,7 +163,7 @@ c448_error_t curve448_scalar_decode( curve448_scalar_mul(s, s, curve448_scalar_one); /* ham-handed reduce */ - return c448_succeed_if(~word_is_zero(accum)); + return c448_succeed_if(~word_is_zero((uint32_t)accum)); } void curve448_scalar_destroy(curve448_scalar_t scalar) @@ -221,15 +221,15 @@ void curve448_scalar_encode(unsigned char ser[C448_SCALAR_BYTES], void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a) { - c448_word_t mask = -(a->limb[0] & 1); + c448_word_t mask = 0 - (a->limb[0] & 1); c448_dword_t chain = 0; unsigned int i; for (i = 0; i < C448_SCALAR_LIMBS; i++) { chain = (chain + a->limb[i]) + (sc_p->limb[i] & mask); - out->limb[i] = chain; + out->limb[i] = (c448_word_t)chain; chain >>= C448_WORD_BITS; } for (i = 0; i < C448_SCALAR_LIMBS - 1; i++) out->limb[i] = out->limb[i] >> 1 | out->limb[i + 1] << (WBITS - 1); - out->limb[i] = out->limb[i] >> 1 | chain << (WBITS - 1); + out->limb[i] = out->limb[i] >> 1 | (c448_word_t)(chain << (WBITS - 1)); } diff --git a/crypto/ec/curve448/word.h b/crypto/ec/curve448/word.h index 4320909f33..7d47a58c32 100644 --- a/crypto/ec/curve448/word.h +++ b/crypto/ec/curve448/word.h @@ -34,7 +34,8 @@ # if defined(__ARM_NEON__) # include # elif defined(__SSE2__) -# if !defined(__GNUC__) || defined(__clang__) || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4) +# if !defined(__GNUC__) || defined(__clang__) || __GNUC__ >= 5 \ + || (__GNUC__==4 && __GNUC_MINOR__ >= 4) # include # else # include @@ -83,7 +84,8 @@ typedef int32_t int32x2_t __attribute__ ((ext_vector_type(2))); typedef uint32_t uint32x8_t __attribute__ ((ext_vector_type(8))); typedef int32_t int32x8_t __attribute__ ((ext_vector_type(8))); typedef word_t vecmask_t __attribute__ ((ext_vector_type(4))); -# else /* GCC, hopefully? */ +# elif defined(__GNUC__) \ + && (__GNUC__ >= 4 || (__GNUC__== 3 && __GNUC_MINOR__ >= 1)) typedef uint64_t uint64x2_t __attribute__ ((vector_size(16))); typedef int64_t int64x2_t __attribute__ ((vector_size(16))); typedef uint64_t uint64x4_t __attribute__ ((vector_size(32))); @@ -131,8 +133,9 @@ static ossl_inline big_register_t br_set_to_mask(mask_t x) { return vdupq_n_u32(x); } -# elif defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \ - || defined(__aarch64__) +# elif !defined(_MSC_VER) \ + && (defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \ + || defined(__aarch64__)) # define VECTOR_ALIGNED __attribute__((aligned(8))) typedef uint64_t big_register_t, uint64xn_t; @@ -142,7 +145,15 @@ static ossl_inline big_register_t br_set_to_mask(mask_t x) return (big_register_t) x; } # else -# define VECTOR_ALIGNED __attribute__((aligned(4))) +# ifdef __GNUC__ +# define VECTOR_ALIGNED __attribute__((aligned(4))) +# else +/* + * This shouldn't be a problem because a big_register_t isn't actually a vector + * type anyway in this case. + */ +# define VECTOR_ALIGNED +# endif typedef uint64_t uint64xn_t; typedef uint32_t uint32xn_t; typedef uint32_t big_register_t;