size_tification.
[openssl.git] / crypto / bn / bn_mont.c
index b532305b80b08bc508342a167dac9cec92085cf0..7224637ab31edb07834a809615180df5d7cec479 100644 (file)
@@ -240,20 +240,69 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
                        }
                }
        bn_correct_top(r);
-       
-       /* mont->ri will be a multiple of the word size */
-#if 0
-       BN_rshift(ret,r,mont->ri);
-#else
-       if (r->top < ri)
+
+       /* mont->ri will be a multiple of the word size and below code
+        * is kind of BN_rshift(ret,r,mont->ri) equivalent */
+       if (r->top <= ri)
                {
                ret->top=0;
                return(1);
                }
        al=r->top-ri;
-       if (bn_wexpand(ret,al) == NULL) return(0);
+
+#define BRANCH_FREE 1
+#if BRANCH_FREE
+       if (bn_wexpand(ret,ri) == NULL) return(0);
+       x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
+       ret->top=x=(ri&~x)|(al&x);      /* min(ri,al) */
        ret->neg=r->neg;
+
+       rp=ret->d;
+       ap=&(r->d[ri]);
+
+       {
+       size_t m1,m2;
+
+       v=bn_sub_words(rp,ap,np,ri);
+       /* this ----------------^^ works even in al<ri case
+        * thanks to zealous zeroing of top of the vector in the
+        * beginning. */
+
+       /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
+       /* in other words if subtraction result is real, then
+        * trick unconditional memcpy below to perform in-place
+        * "refresh" instead of actual copy. */
+       m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1);   /* al<ri */
+       m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1);   /* al>ri */
+       m1|=m2;                 /* (al!=ri) */
+       m1|=(0-(size_t)v);      /* (al!=ri || v) */
+       m1&=~m2;                /* (al!=ri || v) && !al>ri */
+       nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
+       }
+
+       /* 'i<ri' is chosen to eliminate dependency on input data, even
+        * though it results in redundant copy in al<ri case. */
+       for (i=0,ri-=4; i<ri; i+=4)
+               {
+               BN_ULONG t1,t2,t3,t4;
+               
+               t1=nrp[i+0];
+               t2=nrp[i+1];
+               t3=nrp[i+2];    ap[i+0]=0;
+               t4=nrp[i+3];    ap[i+1]=0;
+               rp[i+0]=t1;     ap[i+2]=0;
+               rp[i+1]=t2;     ap[i+3]=0;
+               rp[i+2]=t3;
+               rp[i+3]=t4;
+               }
+       for (ri+=4; i<ri; i++)
+               rp[i]=nrp[i], ap[i]=0;
+       bn_correct_top(r);
+       bn_correct_top(ret);
+#else
+       if (bn_wexpand(ret,al) == NULL) return(0);
        ret->top=al;
+       ret->neg=r->neg;
 
        rp=ret->d;
        ap=&(r->d[ri]);
@@ -274,12 +323,12 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
        al+=4;
        for (; i<al; i++)
                rp[i]=ap[i];
-#endif
 
        if (BN_ucmp(ret, &(mont->N)) >= 0)
                {
                if (!BN_usub(ret,ret,&(mont->N))) return(0);
                }
+#endif
        bn_check_top(ret);
 
        return(1);
@@ -345,6 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
        BN_init(&(ctx->RR));
        BN_init(&(ctx->N));
        BN_init(&(ctx->Ni));
+       ctx->n0[0] = ctx->n0[1] = 0;
        ctx->flags=0;
        }
 
@@ -376,6 +426,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                BIGNUM tmod;
                BN_ULONG buf[2];
 
+               BN_init(&tmod);
                tmod.d=buf;
                tmod.dmax=2;
                tmod.neg=0;
@@ -383,6 +434,11 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
 
 #if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+               /* Only certain BN_BITS2<=32 platforms actually make use of
+                * n0[1], and we could use the #else case (with a shorter R
+                * value) for the others.  However, currently only the assembler
+                * files do know which is which. */
+
                BN_zero(R);
                if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
 
@@ -481,6 +537,7 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
                                        const BIGNUM *mod, BN_CTX *ctx)
        {
        int got_write_lock = 0;
+       BN_MONT_CTX *ret;
 
        CRYPTO_r_lock(lock);
        if (!*pmont)
@@ -491,19 +548,20 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
 
                if (!*pmont)
                        {
-                       BN_MONT_CTX *mtmp;
-                       mtmp = BN_MONT_CTX_new();
-                       if (mtmp && !BN_MONT_CTX_set(mtmp, mod, ctx))
-                               BN_MONT_CTX_free(mtmp);
+                       ret = BN_MONT_CTX_new();
+                       if (ret && !BN_MONT_CTX_set(ret, mod, ctx))
+                               BN_MONT_CTX_free(ret);
                        else
-                               *pmont = mtmp;
+                               *pmont = ret;
                        }
                }
        
+       ret = *pmont;
+       
        if (got_write_lock)
                CRYPTO_w_unlock(lock);
        else
                CRYPTO_r_unlock(lock);
                
-       return *pmont;
+       return ret;
        }