From 3a4a88f436ed1dd1165e0b59c1ca4a25e9e1d690 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 23 Nov 2018 17:23:31 +0100 Subject: [PATCH] bn/bn_{div|shift}.c: introduce fixed-top interfaces. Fixed-top interfaces tolerate zero-padded inputs and facilitate constant-time-ness. bn_div_fixed_top tolerates zero-padded dividend, but not divisor. It's argued that divisor's length is public even when value is secret. [extended tests] Reviewed-by: Paul Dale Reviewed-by: Matt Caswell (Merged from https://github.com/openssl/openssl/pull/7589) --- crypto/bn/bn_div.c | 251 +++++++++++++++---------------- crypto/bn/bn_shift.c | 130 +++++++++++++--- crypto/include/internal/bn_int.h | 9 +- 3 files changed, 233 insertions(+), 157 deletions(-) diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c index 4c84490248..3a6fa0a1b1 100644 --- a/crypto/bn/bn_div.c +++ b/crypto/bn/bn_div.c @@ -7,6 +7,7 @@ * https://www.openssl.org/source/license.html */ +#include #include #include "internal/cryptlib.h" #include "bn_lcl.h" @@ -137,6 +138,26 @@ static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0) # endif # endif +static int bn_left_align(BIGNUM *num) +{ + BN_ULONG *d = num->d, n, m, rmask; + int top = num->top; + int rshift = BN_num_bits_word(d[top - 1]), lshift, i; + + lshift = BN_BITS2 - rshift; + rshift %= BN_BITS2; /* say no to undefined behaviour */ + rmask = (BN_ULONG)0 - rshift; /* rmask = 0 - (rshift != 0) */ + rmask |= rmask >> 8; + + for (i = 0, m = 0; i < top; i++) { + n = d[i]; + d[i] = ((n << lshift) | m) & BN_MASK2; + m = (n >> rshift) & rmask; + } + + return lshift; +} + # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \ && !defined(PEDANTIC) && !defined(BN_DIV3W) # if defined(__GNUC__) && __GNUC__>=2 @@ -188,55 +209,73 @@ static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0) int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_CTX *ctx) { - int norm_shift, i, j, loop; - BIGNUM *tmp, wnum, *snum, *sdiv, *res; - BN_ULONG *resp, *wnump; - BN_ULONG d0, d1; - int num_n, div_n; - int no_branch = 0; + int ret; + + if (BN_is_zero(divisor)) { + BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); + return 0; + } /* * Invalid zero-padding would have particularly bad consequences so don't * just rely on bn_check_top() here (bn_check_top() works only for * BN_DEBUG builds) */ - if ((num->top > 0 && num->d[num->top - 1] == 0) || - (divisor->top > 0 && divisor->d[divisor->top - 1] == 0)) { + if (divisor->d[divisor->top - 1] == 0) { BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED); return 0; } - bn_check_top(num); - bn_check_top(divisor); + ret = bn_div_fixed_top(dv, rm, num, divisor, ctx); - if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) - || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) { - no_branch = 1; + if (ret) { + if (dv != NULL) + bn_correct_top(dv); + if (rm != NULL) + bn_correct_top(rm); } - bn_check_top(dv); - bn_check_top(rm); - /*- bn_check_top(num); *//* - * 'num' has been checked already - */ - /*- bn_check_top(divisor); *//* - * 'divisor' has been checked already - */ + return ret; +} - if (BN_is_zero(divisor)) { - BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); - return 0; - } +/* + * It's argued that *length* of *significant* part of divisor is public. + * Even if it's private modulus that is. Again, *length* is assumed + * public, but not *value*. Former is likely to be pre-defined by + * algorithm with bit granularity, though below subroutine is invariant + * of limb length. Thanks to this assumption we can require that |divisor| + * may not be zero-padded, yet claim this subroutine "constant-time"(*). + * This is because zero-padded dividend, |num|, is tolerated, so that + * caller can pass dividend of public length(*), but with smaller amount + * of significant limbs. This naturally means that quotient, |dv|, would + * contain correspongly less significant limbs as well, and will be zero- + * padded accordingly. Returned remainder, |rm|, will have same bit length + * as divisor, also zero-padded if needed. These actually leave sign bits + * in ambiguous state. In sense that we try to avoid negative zeros, while + * zero-padded zeros would retain sign. + * + * (*) "Constant-time-ness" has two pre-conditions: + * + * - availability of constant-time bn_div_3_words; + * - dividend is at least as "wide" as divisor, limb-wise, zero-padded + * if so requied, which shouldn't be a privacy problem, because + * divisor's length is considered public; + */ +int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, + const BIGNUM *divisor, BN_CTX *ctx) +{ + int norm_shift, i, j, loop; + BIGNUM *tmp, *snum, *sdiv, *res; + BN_ULONG *resp, *wnum, *wnumtop; + BN_ULONG d0, d1; + int num_n, div_n; - if (!no_branch && BN_ucmp(num, divisor) < 0) { - if (rm != NULL) { - if (BN_copy(rm, num) == NULL) - return 0; - } - if (dv != NULL) - BN_zero(dv); - return 1; - } + assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0); + + bn_check_top(num); + bn_check_top(divisor); + bn_check_top(dv); + bn_check_top(rm); BN_CTX_start(ctx); res = (dv == NULL) ? BN_CTX_get(ctx) : dv; @@ -247,112 +286,72 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, goto err; /* First we normalise the numbers */ - norm_shift = BN_BITS2 - ((BN_num_bits(divisor)) % BN_BITS2); - if (!(BN_lshift(sdiv, divisor, norm_shift))) + if (!BN_copy(sdiv, divisor)) goto err; + norm_shift = bn_left_align(sdiv); sdiv->neg = 0; - norm_shift += BN_BITS2; - if (!(BN_lshift(snum, num, norm_shift))) + /* + * Note that bn_lshift_fixed_top's output is always one limb longer + * than input, even when norm_shift is zero. This means that amount of + * inner loop iterations is invariant of dividend value, and that one + * doesn't need to compare dividend and divisor if they were originally + * of the same bit length. + */ + if (!(bn_lshift_fixed_top(snum, num, norm_shift))) goto err; - snum->neg = 0; - - if (no_branch) { - /* - * Since we don't know whether snum is larger than sdiv, we pad snum - * with enough zeroes without changing its value. - */ - if (snum->top <= sdiv->top + 1) { - if (bn_wexpand(snum, sdiv->top + 2) == NULL) - goto err; - for (i = snum->top; i < sdiv->top + 2; i++) - snum->d[i] = 0; - snum->top = sdiv->top + 2; - } else { - if (bn_wexpand(snum, snum->top + 1) == NULL) - goto err; - snum->d[snum->top] = 0; - snum->top++; - } - } div_n = sdiv->top; num_n = snum->top; + + if (num_n <= div_n) { + /* caller didn't pad dividend -> no constant-time guarantee... */ + if (bn_wexpand(snum, div_n + 1) == NULL) + goto err; + memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG)); + snum->top = num_n = div_n + 1; + } + loop = num_n - div_n; /* * Lets setup a 'window' into snum This is the part that corresponds to * the current 'area' being divided */ - wnum.neg = 0; - wnum.d = &(snum->d[loop]); - wnum.top = div_n; - wnum.flags = BN_FLG_STATIC_DATA; - /* - * only needed when BN_ucmp messes up the values between top and max - */ - wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */ + wnum = &(snum->d[loop]); + wnumtop = &(snum->d[num_n - 1]); /* Get the top 2 words of sdiv */ - /* div_n=sdiv->top; */ d0 = sdiv->d[div_n - 1]; d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2]; - /* pointer to the 'top' of snum */ - wnump = &(snum->d[num_n - 1]); - - /* Setup to 'res' */ - if (!bn_wexpand(res, (loop + 1))) + /* Setup quotient */ + if (!bn_wexpand(res, loop)) goto err; res->neg = (num->neg ^ divisor->neg); - res->top = loop - no_branch; - resp = &(res->d[loop - 1]); + res->top = loop; + res->flags |= BN_FLG_FIXED_TOP; + resp = &(res->d[loop]); /* space for temp */ if (!bn_wexpand(tmp, (div_n + 1))) goto err; - if (!no_branch) { - if (BN_ucmp(&wnum, sdiv) >= 0) { - /* - * If BN_DEBUG_RAND is defined BN_ucmp changes (via bn_pollute) - * the const bignum arguments => clean the values between top and - * max again - */ - bn_clear_top2max(&wnum); - bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); - *resp = 1; - } else - res->top--; - } - - /* Increase the resp pointer so that we never create an invalid pointer. */ - resp++; - - /* - * if res->top == 0 then clear the neg value otherwise decrease the resp - * pointer - */ - if (res->top == 0) - res->neg = 0; - else - resp--; - - for (i = 0; i < loop - 1; i++, wnump--) { + for (i = 0; i < loop; i++, wnumtop--) { BN_ULONG q, l0; /* * the first part of the loop uses the top two words of snum and sdiv * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv */ # if defined(BN_DIV3W) - q = bn_div_3_words(wnump, d1, d0); + q = bn_div_3_words(wnumtop, d1, d0); # else BN_ULONG n0, n1, rem = 0; - n0 = wnump[0]; - n1 = wnump[-1]; + n0 = wnumtop[0]; + n1 = wnumtop[-1]; if (n0 == d0) q = BN_MASK2; else { /* n0 < d0 */ - + BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2]; # ifdef BN_LLONG BN_ULLONG t2; @@ -372,7 +371,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, t2 = (BN_ULLONG) d1 *q; for (;;) { - if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | wnump[-2])) + if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2)) break; q--; rem += d0; @@ -405,7 +404,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, # endif for (;;) { - if ((t2h < rem) || ((t2h == rem) && (t2l <= wnump[-2]))) + if ((t2h < rem) || ((t2h == rem) && (t2l <= n2))) break; q--; rem += d0; @@ -421,12 +420,12 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q); tmp->d[div_n] = l0; - wnum.d--; + wnum--; /* - * ingore top values of the bignums just sub the two BN_ULONG arrays + * ignore top values of the bignums just sub the two BN_ULONG arrays * with bn_sub_words */ - l0 = bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1); + l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1); q -= l0; /* * Note: As we have considered only the leading two BN_ULONGs in @@ -435,31 +434,19 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, */ for (l0 = 0 - l0, j = 0; j < div_n; j++) tmp->d[j] = sdiv->d[j] & l0; - l0 = bn_add_words(wnum.d, wnum.d, tmp->d, div_n); - /* - * we can't have an overflow here (assuming that q != 0, but - * if q == 0 then tmp is zero anyway) - */ - (*wnump) += l0; + l0 = bn_add_words(wnum, wnum, tmp->d, div_n); + (*wnumtop) += l0; + assert((*wnumtop) == 0); /* store part of the result */ - resp--; - *resp = q; - } - bn_correct_top(snum); - if (rm != NULL) { - /* - * Keep a copy of the neg flag in num because if rm==num BN_rshift() - * will overwrite it. - */ - int neg = num->neg; - BN_rshift(rm, snum, norm_shift); - if (!BN_is_zero(rm)) - rm->neg = neg; - bn_check_top(rm); + *--resp = q; } - if (no_branch) - bn_correct_top(res); + /* snum holds remainder, it's as wide as divisor */ + snum->neg = num->neg; + snum->top = div_n; + snum->flags |= BN_FLG_FIXED_TOP; + if (rm != NULL) + bn_rshift_fixed_top(rm, snum, norm_shift); BN_CTX_end(ctx); return 1; err: diff --git a/crypto/bn/bn_shift.c b/crypto/bn/bn_shift.c index 15d4b321ba..b7a1e0ff9a 100644 --- a/crypto/bn/bn_shift.c +++ b/crypto/bn/bn_shift.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -7,6 +7,7 @@ * https://www.openssl.org/source/license.html */ +#include #include "internal/cryptlib.h" #include "bn_lcl.h" @@ -82,40 +83,70 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) { - int i, nw, lb, rb; - BN_ULONG *t, *f; - BN_ULONG l; - - bn_check_top(r); - bn_check_top(a); + int ret; if (n < 0) { BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT); return 0; } + ret = bn_lshift_fixed_top(r, a, n); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +/* + * In respect to shift factor the execution time is invariant of + * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition + * for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being + * non-secret. + */ +int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n) +{ + int i, nw; + unsigned int lb, rb; + BN_ULONG *t, *f; + BN_ULONG l, m, rmask = 0; + + assert(n >= 0); + + bn_check_top(r); + bn_check_top(a); + nw = n / BN_BITS2; if (bn_wexpand(r, a->top + nw + 1) == NULL) return 0; - r->neg = a->neg; - lb = n % BN_BITS2; - rb = BN_BITS2 - lb; - f = a->d; - t = r->d; - t[a->top + nw] = 0; - if (lb == 0) - for (i = a->top - 1; i >= 0; i--) - t[nw + i] = f[i]; - else - for (i = a->top - 1; i >= 0; i--) { - l = f[i]; - t[nw + i + 1] |= (l >> rb) & BN_MASK2; - t[nw + i] = (l << lb) & BN_MASK2; + + if (a->top != 0) { + lb = (unsigned int)n % BN_BITS2; + rb = BN_BITS2 - lb; + rb %= BN_BITS2; /* say no to undefined behaviour */ + rmask = (BN_ULONG)0 - rb; /* rmask = 0 - (rb != 0) */ + rmask |= rmask >> 8; + f = &(a->d[0]); + t = &(r->d[nw]); + l = f[a->top - 1]; + t[a->top] = (l >> rb) & rmask; + for (i = a->top - 1; i > 0; i--) { + m = l << lb; + l = f[i - 1]; + t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2; } - memset(t, 0, sizeof(*t) * nw); + t[0] = (l << lb) & BN_MASK2; + } else { + /* shouldn't happen, but formally required */ + r->d[nw] = 0; + } + if (nw != 0) + memset(r->d, 0, sizeof(*t) * nw); + + r->neg = a->neg; r->top = a->top + nw + 1; - bn_correct_top(r); - bn_check_top(r); + r->flags |= BN_FLG_FIXED_TOP; + return 1; } @@ -173,3 +204,54 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) bn_check_top(r); return 1; } + +/* + * In respect to shift factor the execution time is invariant of + * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition + * for constant-time-ness for sufficiently[!] zero-padded inputs is + * |n < BN_BITS2| or |n / BN_BITS2| being non-secret. + */ +int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n) +{ + int i, top, nw; + unsigned int lb, rb; + BN_ULONG *t, *f; + BN_ULONG l, m, mask; + + bn_check_top(r); + bn_check_top(a); + + assert(n >= 0); + + nw = n / BN_BITS2; + if (nw >= a->top) { + /* shouldn't happen, but formally required */ + BN_zero(r); + return 1; + } + + rb = (unsigned int)n % BN_BITS2; + lb = BN_BITS2 - rb; + lb %= BN_BITS2; /* say no to undefined behaviour */ + mask = (BN_ULONG)0 - lb; /* mask = 0 - (lb != 0) */ + mask |= mask >> 8; + top = a->top - nw; + if (r != a && bn_wexpand(r, top) == NULL) + return 0; + + t = &(r->d[0]); + f = &(a->d[nw]); + l = f[0]; + for (i = 0; i < top - 1; i++) { + m = f[i + 1]; + t[i] = (l >> rb) | ((m << lb) & mask); + l = m; + } + t[i] = l >> rb; + + r->neg = a->neg; + r->top = top; + r->flags |= BN_FLG_FIXED_TOP; + + return 1; +} diff --git a/crypto/include/internal/bn_int.h b/crypto/include/internal/bn_int.h index cffe5cfc16..30be7efe14 100644 --- a/crypto/include/internal/bn_int.h +++ b/crypto/include/internal/bn_int.h @@ -65,7 +65,10 @@ int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words); * is customarily arranged by bn_correct_top. Output from below functions * is not processed with bn_correct_top, and for this reason it may not be * returned out of public API. It may only be passed internally into other - * functions known to support non-minimal or zero-padded BIGNUMs. + * functions known to support non-minimal or zero-padded BIGNUMs. Even + * though the goal is to facilitate constant-time-ness, not each subroutine + * is constant-time by itself. They all have pre-conditions, consult source + * code... */ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_CTX *ctx); @@ -79,5 +82,9 @@ int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m); int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx); +int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n); +int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n); +int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, + const BIGNUM *d, BN_CTX *ctx); #endif -- 2.34.1