rsa/rsa_ossl.c: implement variant of "Smooth CRT-RSA."
authorAndy Polyakov <appro@openssl.org>
Fri, 10 Aug 2018 17:46:03 +0000 (19:46 +0200)
committerAndy Polyakov <appro@openssl.org>
Thu, 23 Aug 2018 20:20:35 +0000 (22:20 +0200)
In [most common] case of p and q being of same width, it's possible to
replace CRT modulo operations with Montgomery reductions. And those are
even fixed-length Montgomery reductions...

Reviewed-by: Paul Dale <paul.dale@oracle.com>
(Merged from https://github.com/openssl/openssl/pull/6915)

crypto/rsa/rsa_ossl.c

index c441905526c82b0e49c1945e9262498bbb0c6688..e1e1c23740fe205b28fef44c391006b0638d65ef 100644 (file)
@@ -133,8 +133,8 @@ static int rsa_ossl_public_encrypt(int flen, const unsigned char *from,
     }
 
     if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-        if (!BN_MONT_CTX_set_locked
-            (&rsa->_method_mod_n, rsa->lock, rsa->n, ctx))
+        if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                    rsa->n, ctx))
             goto err;
 
     if (!rsa->meth->bn_mod_exp(ret, f, rsa->e, rsa->n, ctx,
@@ -319,8 +319,8 @@ static int rsa_ossl_private_encrypt(int flen, const unsigned char *from,
         BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
 
         if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-            if (!BN_MONT_CTX_set_locked
-                (&rsa->_method_mod_n, rsa->lock, rsa->n, ctx)) {
+            if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                        rsa->n, ctx)) {
                 BN_free(d);
                 goto err;
             }
@@ -444,8 +444,8 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
         BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
 
         if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-            if (!BN_MONT_CTX_set_locked
-                (&rsa->_method_mod_n, rsa->lock, rsa->n, ctx)) {
+            if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                        rsa->n, ctx)) {
                 BN_free(d);
                 goto err;
             }
@@ -550,8 +550,8 @@ static int rsa_ossl_public_decrypt(int flen, const unsigned char *from,
     }
 
     if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-        if (!BN_MONT_CTX_set_locked
-            (&rsa->_method_mod_n, rsa->lock, rsa->n, ctx))
+        if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                    rsa->n, ctx))
             goto err;
 
     if (!rsa->meth->bn_mod_exp(ret, f, rsa->e, rsa->n, ctx,
@@ -592,7 +592,7 @@ static int rsa_ossl_public_decrypt(int flen, const unsigned char *from,
 static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
 {
     BIGNUM *r1, *m1, *vrfy, *r2, *m[RSA_MAX_PRIME_NUM - 2];
-    int ret = 0, i, ex_primes = 0;
+    int ret = 0, i, ex_primes = 0, smooth = 0;
     RSA_PRIME_INFO *pinfo;
 
     BN_CTX_start(ctx);
@@ -609,65 +609,88 @@ static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
              || ex_primes > RSA_MAX_PRIME_NUM - 2))
         goto err;
 
-    {
-        BIGNUM *p = BN_new(), *q = BN_new();
+    if (rsa->flags & RSA_FLAG_CACHE_PRIVATE) {
+        BIGNUM *factor = BN_new();
+
+        if (factor == NULL)
+            goto err;
 
         /*
          * Make sure BN_mod_inverse in Montgomery initialization uses the
          * BN_FLG_CONSTTIME flag
          */
-        if (p == NULL || q == NULL) {
-            BN_free(p);
-            BN_free(q);
+        if (!(BN_with_flags(factor, rsa->p, BN_FLG_CONSTTIME),
+              BN_MONT_CTX_set_locked(&rsa->_method_mod_p, rsa->lock,
+                                     factor, ctx))
+            || !(BN_with_flags(factor, rsa->q, BN_FLG_CONSTTIME),
+                 BN_MONT_CTX_set_locked(&rsa->_method_mod_q, rsa->lock,
+                                        factor, ctx))) {
+            BN_free(factor);
             goto err;
         }
-        BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME);
-        BN_with_flags(q, rsa->q, BN_FLG_CONSTTIME);
-
-        if (rsa->flags & RSA_FLAG_CACHE_PRIVATE) {
-            if (!BN_MONT_CTX_set_locked
-                (&rsa->_method_mod_p, rsa->lock, p, ctx)
-                || !BN_MONT_CTX_set_locked(&rsa->_method_mod_q,
-                                           rsa->lock, q, ctx)) {
-                BN_free(p);
-                BN_free(q);
+        for (i = 0; i < ex_primes; i++) {
+            pinfo = sk_RSA_PRIME_INFO_value(rsa->prime_infos, i);
+            BN_with_flags(factor, pinfo->r, BN_FLG_CONSTTIME);
+            if (!BN_MONT_CTX_set_locked(&pinfo->m, rsa->lock, factor, ctx)) {
+                BN_free(factor);
                 goto err;
             }
-            if (ex_primes > 0) {
-                /* cache BN_MONT_CTX for other primes */
-                BIGNUM *r = BN_new();
-
-                if (r == NULL) {
-                    BN_free(p);
-                    BN_free(q);
-                    goto err;
-                }
-
-                for (i = 0; i < ex_primes; i++) {
-                    pinfo = sk_RSA_PRIME_INFO_value(rsa->prime_infos, i);
-                    BN_with_flags(r, pinfo->r, BN_FLG_CONSTTIME);
-                    if (!BN_MONT_CTX_set_locked(&pinfo->m, rsa->lock, r, ctx)) {
-                        BN_free(p);
-                        BN_free(q);
-                        BN_free(r);
-                        goto err;
-                    }
-                }
-                BN_free(r);
-            }
         }
         /*
-         * We MUST free p and q before any further use of rsa->p and rsa->q
+         * We MUST free |factor| before any further use of the prime factors
          */
-        BN_free(p);
-        BN_free(q);
+        BN_free(factor);
+
+        smooth = (ex_primes == 0)
+                 && (rsa->meth->bn_mod_exp == BN_mod_exp_mont)
+                 && (BN_num_bits(rsa->q) == BN_num_bits(rsa->p));
     }
 
     if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-        if (!BN_MONT_CTX_set_locked
-            (&rsa->_method_mod_n, rsa->lock, rsa->n, ctx))
+        if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                    rsa->n, ctx))
+            goto err;
+
+    if (smooth) {
+        /*
+         * Conversion from Montgomery domain, a.k.a. Montgomery reduction,
+         * accepts values in [0-m*2^w) range. w is m's bit width rounded up
+         * to limb width. So that at the very least if |I| is fully reduced,
+         * i.e. less than p*q, we can count on from-to round to perform
+         * below modulo operations on |I|. Unlike BN_mod it's constant time.
+         */
+        if (/* m1 = I moq q */
+            !bn_from_mont_fixed_top(m1, I, rsa->_method_mod_q, ctx)
+            || !bn_to_mont_fixed_top(m1, m1, rsa->_method_mod_q, ctx)
+            /* m1 = m1^dmq1 mod q */
+            || !BN_mod_exp_mont_consttime(m1, m1, rsa->dmq1, rsa->q, ctx,
+                                          rsa->_method_mod_q)
+            /* r1 = I mod p */
+            || !bn_from_mont_fixed_top(r1, I, rsa->_method_mod_p, ctx)
+            || !bn_to_mont_fixed_top(r1, r1, rsa->_method_mod_p, ctx)
+            /* r1 = r1^dmp1 mod p */
+            || !BN_mod_exp_mont_consttime(r1, r1, rsa->dmp1, rsa->p, ctx,
+                                          rsa->_method_mod_p)
+            /* r1 = (r1 - m1) mod p */
+            /*
+             * bn_mod_sub_fixed_top is not regular modular subtraction,
+             * it can tolerate subtrahend to be larger than modulus, but
+             * not bit-wise wider. This makes up for uncommon q>p case,
+             * when |m1| can be larger than |rsa->p|.
+             */
+            || !bn_mod_sub_fixed_top(r1, r1, m1, rsa->p)
+
+            /* r0 = r0 * iqmp mod p */
+            || !bn_to_mont_fixed_top(r1, r1, rsa->_method_mod_p, ctx)
+            || !bn_mul_mont_fixed_top(r1, r1, rsa->iqmp, rsa->_method_mod_p,
+                                      ctx)
+            || !bn_mul_fixed_top(r0, r1, rsa->q, ctx)
+            || !bn_mod_add_fixed_top(r0, r0, m1, rsa->n))
             goto err;
 
+        goto tail;
+    }
+
     /* compute I mod q */
     {
         BIGNUM *c = BN_new();
@@ -690,7 +713,7 @@ static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
 
             /* compute r1^dmq1 mod q */
             if (!rsa->meth->bn_mod_exp(m1, r1, dmq1, rsa->q, ctx,
-                rsa->_method_mod_q)) {
+                                       rsa->_method_mod_q)) {
                 BN_free(c);
                 BN_free(dmq1);
                 goto err;
@@ -859,10 +882,18 @@ static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
         BN_free(pr2);
     }
 
+ tail:
     if (rsa->e && rsa->n) {
-        if (!rsa->meth->bn_mod_exp(vrfy, r0, rsa->e, rsa->n, ctx,
-                                   rsa->_method_mod_n))
-            goto err;
+        if (rsa->meth->bn_mod_exp == BN_mod_exp_mont) {
+            if (!BN_mod_exp_mont(vrfy, r0, rsa->e, rsa->n, ctx,
+                                 rsa->_method_mod_n))
+                goto err;
+        } else {
+            bn_correct_top(r0);
+            if (!rsa->meth->bn_mod_exp(vrfy, r0, rsa->e, rsa->n, ctx,
+                                       rsa->_method_mod_n))
+                goto err;
+        }
         /*
          * If 'I' was greater than (or equal to) rsa->n, the operation will
          * be equivalent to using 'I mod n'. However, the result of the
@@ -871,6 +902,11 @@ static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
          */
         if (!BN_sub(vrfy, vrfy, I))
             goto err;
+        if (BN_is_zero(vrfy)) {
+            bn_correct_top(r0);
+            ret = 1;
+            goto err;   /* not actually error */
+        }
         if (!BN_mod(vrfy, vrfy, rsa->n, ctx))
             goto err;
         if (BN_is_negative(vrfy))
@@ -897,6 +933,15 @@ static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
             BN_free(d);
         }
     }
+    /*
+     * It's unfortunate that we have to bn_correct_top(r0). What hopefully
+     * saves the day is that correction is highly unlike, and private key
+     * operations are customarily performed on blinded message. Which means
+     * that attacker won't observe correlation with chosen plaintext.
+     * Secondly, remaining code would still handle it in same computational
+     * time and even conceal memory access pattern around corrected top.
+     */
+    bn_correct_top(r0);
     ret = 1;
  err:
     BN_CTX_end(ctx);