Change bn_mul_mont declaration and BN_MONT_CTX. Update CHANGES.
authorAndy Polyakov <appro@openssl.org>
Sat, 22 Oct 2005 17:57:18 +0000 (17:57 +0000)
committerAndy Polyakov <appro@openssl.org>
Sat, 22 Oct 2005 17:57:18 +0000 (17:57 +0000)
CHANGES
crypto/bn/asm/sparcv9a-mont.pl
crypto/bn/asm/x86-mont.pl
crypto/bn/asm/x86_64-mont.pl
crypto/bn/bn.h
crypto/bn/bn_asm.c
crypto/bn/bn_mont.c

diff --git a/CHANGES b/CHANGES
index 89fdaabc758710b7ca4a24062f1eb46cae7b835c..6c1a127ec9e478d3b0e05054194425f99d6d0a68 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,12 @@
 
  Changes between 0.9.8a and 0.9.9  [xx XXX xxxx]
 
+  *) New candidate for BIGNUM assembler implementation, bn_mul_mont,
+     dedicated Montgomery multiplication procedure, is introduced.
+     BN_MONT_CTX is modified to allow bn_mul_mont to reach for higher
+     "64-bit" performance on certain 32-bit targets.
+     [Andy Polyakov]
+
   *) New option SSL_OP_NO_COMP to disable use of compression selectively
      in SSL structures. New SSL ctrl to set maximum send fragment size. 
      Save memory by seeting the I/O buffer sizes dynamically instead of
index efdf03b5f01a1ea3d21e8ad584a91a4de3030450..3855295c853facb719344ec226cb01303d33dff1 100755 (executable)
@@ -70,7 +70,7 @@ $rp="%i0";    # BN_ULONG *rp,
 $ap="%i1";     # const BN_ULONG *ap,
 $bp="%i2";     # const BN_ULONG *bp,
 $np="%i3";     # const BN_ULONG *np,
-$n0="%i4";     # BN_ULONG n0,
+$n0="%i4";     # const BN_ULONG *n0,
 $num="%i5";    # int num);
 
 $tp="%l0";
@@ -125,7 +125,7 @@ $fname:
        sethi   %hi(0xffff),$mask
        sll     $num,3,$num             ! num*=8
        or      $mask,%lo(0xffff),$mask
-       mov     %i4,$n0                 ! reassigned, remember?
+       ldx     [%i4],$n0               ! reassigned, remember?
 
        add     %sp,$bias,%o0           ! real top of stack
        sll     $num,2,%o1
index e250e9907ce0d1d87509891a4d0371f91a20ddbb..8d01b7a87f9897f6214b9bd17ed127ab97462576 100755 (executable)
@@ -66,7 +66,7 @@ if($sse2) {
        &mov    ("ebx",&wparam(1));     # const BN_ULONG *ap
        &mov    ("ecx",&wparam(2));     # const BN_ULONG *bp
        &mov    ("edx",&wparam(3));     # const BN_ULONG *np
-       &mov    ("esi",&wparam(4));     # BN_ULONG n0
+       &mov    ("esi",&wparam(4));     # const BN_ULONG *n0
        &mov    ($num,&wparam(5));      # int num
 
        &mov    ("edi","esp");          # saved stack pointer!
@@ -78,6 +78,7 @@ if($sse2) {
        &sub    ($num,1);               # num is restored to its original value
                                        # and will remain constant from now...
 
+       &mov    ("esi",&DWP(0,"esi"));  # pull n0[0]
        &mov    ($_rp,"eax");           # ... save a copy of argument block
        &mov    ($_ap,"ebx");
        &mov    ($_bp,"ecx");
index f3d112f8ff89ca45ba9ee13d0e59c3094deaabaf..e6943b53436f0444194d8386fc122766aa9c39d3 100755 (executable)
@@ -22,7 +22,7 @@ $rp="%rdi";   # BN_ULONG *rp,
 $ap="%rsi";    # const BN_ULONG *ap,
 $bp="%rdx";    # const BN_ULONG *bp,
 $np="%rcx";    # const BN_ULONG *np,
-$n0="%r8";     # BN_ULONG n0,
+$n0="%r8";     # const BN_ULONG *n0,
 $num="%r9";    # int num);
 $lo0="%r10";
 $hi0="%r11";
@@ -55,6 +55,8 @@ bn_mul_mont:
        mov     %rbp,8(%rsp,$num,8)     # tp[num+1]=%rsp
        mov     %rdx,$bp                # $bp reassigned, remember?
 
+       mov     ($n0),$n0               # pull n0[0] value
+
        xor     $i,$i                   # i=0
        xor     $j,$j                   # j=0
 
index b0c8f098080c07fc0ada82a30349bdcc917b9a15..0b616a6142d2ad63fa07ad786e893aa76168ed96 100644 (file)
@@ -295,7 +295,7 @@ struct bn_mont_ctx_st
        BIGNUM N;      /* The modulus */
        BIGNUM Ni;     /* R*(1/R mod N) - N*Ni = 1
                        * (Ni is only stored for bignum algorithm) */
-       BN_ULONG n0;   /* least significant word of Ni */
+       BN_ULONG n0[2];/* least significant word(s) of Ni */
        int flags;
        };
 
@@ -729,7 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);
        bn_pollute(a); \
        }
 
-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
 void     bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
index cd50b182b7bf95a16c0029c57db4177686159e13..acb9937504648c401b4403692a01bd75ea0d24c0 100644 (file)
@@ -842,9 +842,9 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
  * versions. Assembler vs. assembler improvement coefficients can
  * [and are known to] differ and are to be documented elsewhere.
  */
-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
        {
-       BN_ULONG c0,c1,ml,*tp;
+       BN_ULONG c0,c1,ml,*tp,n0;
 #ifdef mul64
        BN_ULONG mh;
 #endif
@@ -852,10 +852,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
        int i=0,j;
 
 #if 0  /* template for platform-specific implementation */
-       if (ap==bp)     return bn_sqr_mont(rp,ap,np,n0,num);
+       if (ap==bp)     return bn_sqr_mont(rp,ap,np,n0p,num);
 #endif
        vp = tp = alloca((num+2)*sizeof(BN_ULONG));
 
+       n0 = *n0p;
+
        tp[num]   = bn_mul_words(tp,ap,num,bp[0]);
        tp[num+1] = 0;
        goto enter;
index 3a087fdce9ee8a5ffd16758146adf9ebedbc54b9..f035c189737b3008043507b107f1a52604c2b59b 100644 (file)
@@ -152,7 +152,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
 #endif
 
        r->top=max;
-       n0=mont->n0;
+       n0=mont->n0[0];
 
 #ifdef BN_COUNT
        fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
@@ -323,16 +323,49 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                BIGNUM tmod;
                BN_ULONG buf[2];
 
+               tmod.d=buf;
+               tmod.dmax=2;
+               tmod.neg=0;
+
                mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+
+#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+               BN_zero(R);
+               if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
+
+                                                               tmod.top=0;
+               if (buf[0] = mod->d[0])                         tmod.top=1;
+               if (buf[1] = mod->top>1 ? mod->d[1] : 0)        tmod.top=2;
+
+               if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
+                       goto err;
+               if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
+               if (!BN_is_zero(Ri))
+                       {
+                       if (!BN_sub_word(Ri,1)) goto err;
+                       }
+               else /* if N mod word size == 1 */
+                       {
+                       if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
+                               goto err;
+                       /* Ri-- (mod double word size) */
+                       Ri->neg=0;
+                       Ri->d[0]=BN_MASK2;
+                       Ri->d[1]=BN_MASK2;
+                       Ri->top=2;
+                       }
+               if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
+               /* Ni = (R*Ri-1)/N,
+                * keep only couple of least significant words: */
+               mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+               mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
+#else
                BN_zero(R);
                if (!(BN_set_bit(R,BN_BITS2))) goto err;        /* R */
 
                buf[0]=mod->d[0]; /* tmod = N mod word size */
                buf[1]=0;
-               tmod.d=buf;
                tmod.top = buf[0] != 0 ? 1 : 0;
-               tmod.dmax=2;
-               tmod.neg=0;
                                                        /* Ri = R^-1 mod N*/
                if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
                        goto err;
@@ -348,7 +381,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
                /* Ni = (R*Ri-1)/N,
                 * keep only least significant word: */
-               mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
+               mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+               mont->n0[1] = 0;
+#endif
                }
 #else /* !MONT_WORD */
                { /* bignum version */
@@ -384,7 +419,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
        if (!BN_copy(&(to->N),&(from->N))) return NULL;
        if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
        to->ri=from->ri;
-       to->n0=from->n0;
+       to->n0[0]=from->n0[0];
+       to->n0[1]=from->n0[1];
        return(to);
        }