Skip to content

Commit

Permalink
bn_nist: Fix strict-aliasing violations in little-endian optimizations
Browse files Browse the repository at this point in the history
The little-endian optimization is doing some type-punning in a way
violating the C standard aliasing rule by loading or storing through a
lvalue with type "unsigned int" but the memory location has effective
type "unsigned long" or "unsigned long long" (BN_ULONG).  Convert these
accesses to use memcpy instead, as memcpy is defined as-is "accessing
through the lvalues with type char" and char is aliasing with all types.

GCC does a good job to optimize away the temporary copies introduced
with the change.  Ideally copying to a temporary unsigned int array,
doing the calculation, and then copying back to `r_d` will make the code
look better, but unfortunately GCC would fail to optimize away this
temporary array then.

I've not touched the LE optimization in BN_nist_mod_224 because it's
guarded by BN_BITS2!=64, then BN_BITS2 must be 32 and BN_ULONG must be
unsigned int, thus there is no aliasing issue in BN_nist_mod_224.

Fixes #12247.

Reviewed-by: Tom Cosgrove <tom.cosgrove@arm.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from #22816)

(cherry picked from commit 990d9ff)
  • Loading branch information
xry111 authored and t8m committed Nov 30, 2023
1 parent 83975c6 commit aa78b15
Showing 1 changed file with 74 additions and 52 deletions.
126 changes: 74 additions & 52 deletions crypto/bn/bn_nist.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,28 @@ static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
# endif
#endif /* BN_BITS2 != 64 */

#ifdef NIST_INT64
/* Helpers to load/store a 32-bit word (uint32_t) from/into a memory
* location and avoid potential aliasing issue. */
static ossl_inline uint32_t load_u32(const void *ptr)
{
uint32_t tmp;

memcpy(&tmp, ptr, sizeof(tmp));
return tmp;
}

static ossl_inline void store_lo32(void *ptr, NIST_INT64 val)
{
/* A cast is needed for big-endian system: on a 32-bit BE system
* NIST_INT64 may be defined as well if the compiler supports 64-bit
* long long. */
uint32_t tmp = (uint32_t)val;

memcpy(ptr, &tmp, sizeof(tmp));
}
#endif /* NIST_INT64 */

#define nist_set_192(to, from, a1, a2, a3) \
{ \
bn_cp_64(to, 0, from, (a3) - 3) \
Expand Down Expand Up @@ -374,42 +396,42 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
unsigned int *rp = (unsigned int *)r_d;
const unsigned int *bp = (const unsigned int *)buf.ui;

acc = rp[0];
acc = load_u32(&rp[0]);
acc += bp[3 * 2 - 6];
acc += bp[5 * 2 - 6];
rp[0] = (unsigned int)acc;
store_lo32(&rp[0], acc);
acc >>= 32;

acc += rp[1];
acc += load_u32(&rp[1]);
acc += bp[3 * 2 - 5];
acc += bp[5 * 2 - 5];
rp[1] = (unsigned int)acc;
store_lo32(&rp[1], acc);
acc >>= 32;

acc += rp[2];
acc += load_u32(&rp[2]);
acc += bp[3 * 2 - 6];
acc += bp[4 * 2 - 6];
acc += bp[5 * 2 - 6];
rp[2] = (unsigned int)acc;
store_lo32(&rp[2], acc);
acc >>= 32;

acc += rp[3];
acc += load_u32(&rp[3]);
acc += bp[3 * 2 - 5];
acc += bp[4 * 2 - 5];
acc += bp[5 * 2 - 5];
rp[3] = (unsigned int)acc;
store_lo32(&rp[3], acc);
acc >>= 32;

acc += rp[4];
acc += load_u32(&rp[4]);
acc += bp[4 * 2 - 6];
acc += bp[5 * 2 - 6];
rp[4] = (unsigned int)acc;
store_lo32(&rp[4], acc);
acc >>= 32;

acc += rp[5];
acc += load_u32(&rp[5]);
acc += bp[4 * 2 - 5];
acc += bp[5 * 2 - 5];
rp[5] = (unsigned int)acc;
store_lo32(&rp[5], acc);

carry = (int)(acc >> 32);
}
Expand Down Expand Up @@ -683,36 +705,36 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
unsigned int *rp = (unsigned int *)r_d;
const unsigned int *bp = (const unsigned int *)buf.ui;

acc = rp[0];
acc = load_u32(&rp[0]);
acc += bp[8 - 8];
acc += bp[9 - 8];
acc -= bp[11 - 8];
acc -= bp[12 - 8];
acc -= bp[13 - 8];
acc -= bp[14 - 8];
rp[0] = (unsigned int)acc;
store_lo32(&rp[0], acc);
acc >>= 32;

acc += rp[1];
acc += load_u32(&rp[1]);
acc += bp[9 - 8];
acc += bp[10 - 8];
acc -= bp[12 - 8];
acc -= bp[13 - 8];
acc -= bp[14 - 8];
acc -= bp[15 - 8];
rp[1] = (unsigned int)acc;
store_lo32(&rp[1], acc);
acc >>= 32;

acc += rp[2];
acc += load_u32(&rp[2]);
acc += bp[10 - 8];
acc += bp[11 - 8];
acc -= bp[13 - 8];
acc -= bp[14 - 8];
acc -= bp[15 - 8];
rp[2] = (unsigned int)acc;
store_lo32(&rp[2], acc);
acc >>= 32;

acc += rp[3];
acc += load_u32(&rp[3]);
acc += bp[11 - 8];
acc += bp[11 - 8];
acc += bp[12 - 8];
Expand All @@ -721,32 +743,32 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[15 - 8];
acc -= bp[8 - 8];
acc -= bp[9 - 8];
rp[3] = (unsigned int)acc;
store_lo32(&rp[3], acc);
acc >>= 32;

acc += rp[4];
acc += load_u32(&rp[4]);
acc += bp[12 - 8];
acc += bp[12 - 8];
acc += bp[13 - 8];
acc += bp[13 - 8];
acc += bp[14 - 8];
acc -= bp[9 - 8];
acc -= bp[10 - 8];
rp[4] = (unsigned int)acc;
store_lo32(&rp[4], acc);
acc >>= 32;

acc += rp[5];
acc += load_u32(&rp[5]);
acc += bp[13 - 8];
acc += bp[13 - 8];
acc += bp[14 - 8];
acc += bp[14 - 8];
acc += bp[15 - 8];
acc -= bp[10 - 8];
acc -= bp[11 - 8];
rp[5] = (unsigned int)acc;
store_lo32(&rp[5], acc);
acc >>= 32;

acc += rp[6];
acc += load_u32(&rp[6]);
acc += bp[14 - 8];
acc += bp[14 - 8];
acc += bp[15 - 8];
Expand All @@ -755,10 +777,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[13 - 8];
acc -= bp[8 - 8];
acc -= bp[9 - 8];
rp[6] = (unsigned int)acc;
store_lo32(&rp[6], acc);
acc >>= 32;

acc += rp[7];
acc += load_u32(&rp[7]);
acc += bp[15 - 8];
acc += bp[15 - 8];
acc += bp[15 - 8];
Expand All @@ -767,7 +789,7 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[11 - 8];
acc -= bp[12 - 8];
acc -= bp[13 - 8];
rp[7] = (unsigned int)acc;
store_lo32(&rp[7], acc);

carry = (int)(acc >> 32);
}
Expand Down Expand Up @@ -920,43 +942,43 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
unsigned int *rp = (unsigned int *)r_d;
const unsigned int *bp = (const unsigned int *)buf.ui;

acc = rp[0];
acc = load_u32(&rp[0]);
acc += bp[12 - 12];
acc += bp[21 - 12];
acc += bp[20 - 12];
acc -= bp[23 - 12];
rp[0] = (unsigned int)acc;
store_lo32(&rp[0], acc);
acc >>= 32;

acc += rp[1];
acc += load_u32(&rp[1]);
acc += bp[13 - 12];
acc += bp[22 - 12];
acc += bp[23 - 12];
acc -= bp[12 - 12];
acc -= bp[20 - 12];
rp[1] = (unsigned int)acc;
store_lo32(&rp[1], acc);
acc >>= 32;

acc += rp[2];
acc += load_u32(&rp[2]);
acc += bp[14 - 12];
acc += bp[23 - 12];
acc -= bp[13 - 12];
acc -= bp[21 - 12];
rp[2] = (unsigned int)acc;
store_lo32(&rp[2], acc);
acc >>= 32;

acc += rp[3];
acc += load_u32(&rp[3]);
acc += bp[15 - 12];
acc += bp[12 - 12];
acc += bp[20 - 12];
acc += bp[21 - 12];
acc -= bp[14 - 12];
acc -= bp[22 - 12];
acc -= bp[23 - 12];
rp[3] = (unsigned int)acc;
store_lo32(&rp[3], acc);
acc >>= 32;

acc += rp[4];
acc += load_u32(&rp[4]);
acc += bp[21 - 12];
acc += bp[21 - 12];
acc += bp[16 - 12];
Expand All @@ -967,10 +989,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[15 - 12];
acc -= bp[23 - 12];
acc -= bp[23 - 12];
rp[4] = (unsigned int)acc;
store_lo32(&rp[4], acc);
acc >>= 32;

acc += rp[5];
acc += load_u32(&rp[5]);
acc += bp[22 - 12];
acc += bp[22 - 12];
acc += bp[17 - 12];
Expand All @@ -979,59 +1001,59 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[21 - 12];
acc += bp[23 - 12];
acc -= bp[16 - 12];
rp[5] = (unsigned int)acc;
store_lo32(&rp[5], acc);
acc >>= 32;

acc += rp[6];
acc += load_u32(&rp[6]);
acc += bp[23 - 12];
acc += bp[23 - 12];
acc += bp[18 - 12];
acc += bp[15 - 12];
acc += bp[14 - 12];
acc += bp[22 - 12];
acc -= bp[17 - 12];
rp[6] = (unsigned int)acc;
store_lo32(&rp[6], acc);
acc >>= 32;

acc += rp[7];
acc += load_u32(&rp[7]);
acc += bp[19 - 12];
acc += bp[16 - 12];
acc += bp[15 - 12];
acc += bp[23 - 12];
acc -= bp[18 - 12];
rp[7] = (unsigned int)acc;
store_lo32(&rp[7], acc);
acc >>= 32;

acc += rp[8];
acc += load_u32(&rp[8]);
acc += bp[20 - 12];
acc += bp[17 - 12];
acc += bp[16 - 12];
acc -= bp[19 - 12];
rp[8] = (unsigned int)acc;
store_lo32(&rp[8], acc);
acc >>= 32;

acc += rp[9];
acc += load_u32(&rp[9]);
acc += bp[21 - 12];
acc += bp[18 - 12];
acc += bp[17 - 12];
acc -= bp[20 - 12];
rp[9] = (unsigned int)acc;
store_lo32(&rp[9], acc);
acc >>= 32;

acc += rp[10];
acc += load_u32(&rp[10]);
acc += bp[22 - 12];
acc += bp[19 - 12];
acc += bp[18 - 12];
acc -= bp[21 - 12];
rp[10] = (unsigned int)acc;
store_lo32(&rp[10], acc);
acc >>= 32;

acc += rp[11];
acc += load_u32(&rp[11]);
acc += bp[23 - 12];
acc += bp[20 - 12];
acc += bp[19 - 12];
acc -= bp[22 - 12];
rp[11] = (unsigned int)acc;
store_lo32(&rp[11], acc);

carry = (int)(acc >> 32);
}
Expand Down

0 comments on commit aa78b15

Please sign in to comment.