Constify the BIGNUM routines a bit more. The only trouble were the
[openssl.git] / crypto / bn / bn_mul.c
index d0c04e1d4b9b9043e67510424ad958bab08245cb..aed752818295424b3cbeae33af02cb54475737f0 100644 (file)
 #include "cryptlib.h"
 #include "bn_lcl.h"
 
-/* r must be different to a and b */
-/* int BN_mmul(r, a, b) */
-int BN_mul(r, a, b)
-BIGNUM *r;
-BIGNUM *a;
-BIGNUM *b;
+#ifdef BN_RECURSION
+/* Karatsuba recursive multiplication algorithm
+ * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
+
+/* r is 2*n2 words in size,
+ * a and b are both n2 words in size.
+ * n2 must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n2 words in size
+ * We calculate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+            BN_ULONG *t)
        {
-       int i;
-       int max,al,bl;
-       BN_ULONG *ap,*bp,*rp;
+       int n=n2/2,c1,c2;
+       unsigned int neg,zero;
+       BN_ULONG ln,lo,*p;
 
-       al=a->top;
-       bl=b->top;
-       if ((al == 0) || (bl == 0))
+# ifdef BN_COUNT
+       printf(" bn_mul_recursive %d * %d\n",n2,n2);
+# endif
+# ifdef BN_MUL_COMBA
+#  if 0
+       if (n2 == 4)
                {
-               r->top=0;
-               return(1);
+               bn_mul_comba4(r,a,b);
+               return;
+               }
+#  endif
+       if (n2 == 8)
+               {
+               bn_mul_comba8(r,a,b);
+               return; 
+               }
+# endif /* BN_MUL_COMBA */
+       if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
+               {
+               /* This should not happen */
+               bn_mul_normal(r,a,n2,b,n2);
+               return;
+               }
+       /* r=(a[0]-a[1])*(b[1]-b[0]) */
+       c1=bn_cmp_words(a,&(a[n]),n);
+       c2=bn_cmp_words(&(b[n]),b,n);
+       zero=neg=0;
+       switch (c1*3+c2)
+               {
+       case -4:
+               bn_sub_words(t,      &(a[n]),a,      n); /* - */
+               bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
+               break;
+       case -3:
+               zero=1;
+               break;
+       case -2:
+               bn_sub_words(t,      &(a[n]),a,      n); /* - */
+               bn_sub_words(&(t[n]),&(b[n]),b,      n); /* + */
+               neg=1;
+               break;
+       case -1:
+       case 0:
+       case 1:
+               zero=1;
+               break;
+       case 2:
+               bn_sub_words(t,      a,      &(a[n]),n); /* + */
+               bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
+               neg=1;
+               break;
+       case 3:
+               zero=1;
+               break;
+       case 4:
+               bn_sub_words(t,      a,      &(a[n]),n);
+               bn_sub_words(&(t[n]),&(b[n]),b,      n);
+               break;
                }
 
-       max=(al+bl);
-       if (bn_wexpand(r,max) == NULL) return(0);
-       r->top=max;
-       r->neg=a->neg^b->neg;
-       ap=a->d;
-       bp=b->d;
-       rp=r->d;
+# ifdef BN_MUL_COMBA
+       if (n == 4)
+               {
+               if (!zero)
+                       bn_mul_comba4(&(t[n2]),t,&(t[n]));
+               else
+                       memset(&(t[n2]),0,8*sizeof(BN_ULONG));
+               
+               bn_mul_comba4(r,a,b);
+               bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
+               }
+       else if (n == 8)
+               {
+               if (!zero)
+                       bn_mul_comba8(&(t[n2]),t,&(t[n]));
+               else
+                       memset(&(t[n2]),0,16*sizeof(BN_ULONG));
+               
+               bn_mul_comba8(r,a,b);
+               bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
+               }
+       else
+# endif /* BN_MUL_COMBA */
+               {
+               p= &(t[n2*2]);
+               if (!zero)
+                       bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+               else
+                       memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
+               bn_mul_recursive(r,a,b,n,p);
+               bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
+               }
+
+       /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+        * r[10] holds (a[0]*b[0])
+        * r[32] holds (b[1]*b[1])
+        */
 
-       rp[al]=bn_mul_words(rp,ap,al,*(bp++));
-       rp++;
-       for (i=1; i<bl; i++)
+       c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
+
+       if (neg) /* if t[32] is negative */
                {
-               rp[al]=bn_mul_add_words(rp,ap,al,*(bp++));
-               rp++;
+               c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
+               }
+       else
+               {
+               /* Might have a carry */
+               c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
                }
-       if (r->d[max-1] == 0) r->top--;
-       return(1);
-       }
 
-#if 0
-#include "stack.h"
+       /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+        * r[10] holds (a[0]*b[0])
+        * r[32] holds (b[1]*b[1])
+        * c1 holds the carry bits
+        */
+       c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
+       if (c1)
+               {
+               p= &(r[n+n2]);
+               lo= *p;
+               ln=(lo+c1)&BN_MASK2;
+               *p=ln;
 
-int limit=16;
+               /* The overflow will stop before we over write
+                * words we should not overwrite */
+               if (ln < (BN_ULONG)c1)
+                       {
+                       do      {
+                               p++;
+                               lo= *p;
+                               ln=(lo+1)&BN_MASK2;
+                               *p=ln;
+                               } while (ln == 0);
+                       }
+               }
+       }
 
-typedef struct bn_pool_st
+/* n+tn is the word length
+ * t needs to be n*4 is size, as does r */
+void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
+            int n, BN_ULONG *t)
        {
-       int used;
-       int tos;
-       STACK *sk; 
-       } BN_POOL;
+       int i,j,n2=n*2;
+       unsigned int c1,c2,neg,zero;
+       BN_ULONG ln,lo,*p;
 
-BIGNUM *BN_POOL_push(bp)
-BN_POOL *bp;
-       {
-       BIGNUM *ret;
+# ifdef BN_COUNT
+       printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
+# endif
+       if (n < 8)
+               {
+               i=tn+n;
+               bn_mul_normal(r,a,i,b,i);
+               return;
+               }
 
-       if (bp->used >= bp->tos)
+       /* r=(a[0]-a[1])*(b[1]-b[0]) */
+       c1=bn_cmp_words(a,&(a[n]),n);
+       c2=bn_cmp_words(&(b[n]),b,n);
+       zero=neg=0;
+       switch (c1*3+c2)
                {
-               ret=BN_new();
-               sk_push(bp->sk,(char *)ret);
-               bp->tos++;
-               bp->used++;
+       case -4:
+               bn_sub_words(t,      &(a[n]),a,      n); /* - */
+               bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
+               break;
+       case -3:
+               zero=1;
+               /* break; */
+       case -2:
+               bn_sub_words(t,      &(a[n]),a,      n); /* - */
+               bn_sub_words(&(t[n]),&(b[n]),b,      n); /* + */
+               neg=1;
+               break;
+       case -1:
+       case 0:
+       case 1:
+               zero=1;
+               /* break; */
+       case 2:
+               bn_sub_words(t,      a,      &(a[n]),n); /* + */
+               bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
+               neg=1;
+               break;
+       case 3:
+               zero=1;
+               /* break; */
+       case 4:
+               bn_sub_words(t,      a,      &(a[n]),n);
+               bn_sub_words(&(t[n]),&(b[n]),b,      n);
+               break;
+               }
+               /* The zero case isn't yet implemented here. The speedup
+                  would probably be negligible. */
+# if 0
+       if (n == 4)
+               {
+               bn_mul_comba4(&(t[n2]),t,&(t[n]));
+               bn_mul_comba4(r,a,b);
+               bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+               memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
                }
        else
+# endif
+       if (n == 8)
                {
-               ret=(BIGNUM *)sk_value(bp->sk,bp->used);
-               bp->used++;
+               bn_mul_comba8(&(t[n2]),t,&(t[n]));
+               bn_mul_comba8(r,a,b);
+               bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+               memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
+               }
+       else
+               {
+               p= &(t[n2*2]);
+               bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+               bn_mul_recursive(r,a,b,n,p);
+               i=n/2;
+               /* If there is only a bottom half to the number,
+                * just do it */
+               j=tn-i;
+               if (j == 0)
+                       {
+                       bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
+                       memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
+                       }
+               else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
+                               {
+                               bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
+                                       j,i,p);
+                               memset(&(r[n2+tn*2]),0,
+                                       sizeof(BN_ULONG)*(n2-tn*2));
+                               }
+               else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
+                       {
+                       memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
+                       if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
+                               {
+                               bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+                               }
+                       else
+                               {
+                               for (;;)
+                                       {
+                                       i/=2;
+                                       if (i < tn)
+                                               {
+                                               bn_mul_part_recursive(&(r[n2]),
+                                                       &(a[n]),&(b[n]),
+                                                       tn-i,i,p);
+                                               break;
+                                               }
+                                       else if (i == tn)
+                                               {
+                                               bn_mul_recursive(&(r[n2]),
+                                                       &(a[n]),&(b[n]),
+                                                       i,p);
+                                               break;
+                                               }
+                                       }
+                               }
+                       }
+               }
+
+       /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+        * r[10] holds (a[0]*b[0])
+        * r[32] holds (b[1]*b[1])
+        */
+
+       c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
+
+       if (neg) /* if t[32] is negative */
+               {
+               c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
+               }
+       else
+               {
+               /* Might have a carry */
+               c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
+               }
+
+       /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+        * r[10] holds (a[0]*b[0])
+        * r[32] holds (b[1]*b[1])
+        * c1 holds the carry bits
+        */
+       c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
+       if (c1)
+               {
+               p= &(r[n+n2]);
+               lo= *p;
+               ln=(lo+c1)&BN_MASK2;
+               *p=ln;
+
+               /* The overflow will stop before we over write
+                * words we should not overwrite */
+               if (ln < c1)
+                       {
+                       do      {
+                               p++;
+                               lo= *p;
+                               ln=(lo+1)&BN_MASK2;
+                               *p=ln;
+                               } while (ln == 0);
+                       }
                }
-       return(ret);
        }
 
-void BN_POOL_pop(bp,num)
-BN_POOL *bp;
-int num;
+/* a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ */
+void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
+            BN_ULONG *t)
        {
-       bp->used-=num;
+       int n=n2/2;
+
+# ifdef BN_COUNT
+       printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
+# endif
+
+       bn_mul_recursive(r,a,b,n,&(t[0]));
+       if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
+               {
+               bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
+               bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+               bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
+               bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+               }
+       else
+               {
+               bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
+               bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
+               bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+               bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
+               }
        }
 
-int BN_mul(r,a,b)
-BIGNUM *r,*a,*b;
+/* a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ * l is the low words of the output.
+ * t needs to be n2*3
+ */
+void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
+            BN_ULONG *t)
        {
-       static BN_POOL bp;
-       static init=1;
+       int i,n;
+       int c1,c2;
+       int neg,oneg,zero;
+       BN_ULONG ll,lc,*lp,*mp;
 
-       if (init)
+# ifdef BN_COUNT
+       printf(" bn_mul_high %d * %d\n",n2,n2);
+# endif
+       n=n2/2;
+
+       /* Calculate (al-ah)*(bh-bl) */
+       neg=zero=0;
+       c1=bn_cmp_words(&(a[0]),&(a[n]),n);
+       c2=bn_cmp_words(&(b[n]),&(b[0]),n);
+       switch (c1*3+c2)
+               {
+       case -4:
+               bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
+               bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
+               break;
+       case -3:
+               zero=1;
+               break;
+       case -2:
+               bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
+               bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
+               neg=1;
+               break;
+       case -1:
+       case 0:
+       case 1:
+               zero=1;
+               break;
+       case 2:
+               bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
+               bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
+               neg=1;
+               break;
+       case 3:
+               zero=1;
+               break;
+       case 4:
+               bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
+               bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
+               break;
+               }
+               
+       oneg=neg;
+       /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
+       /* r[10] = (a[1]*b[1]) */
+# ifdef BN_MUL_COMBA
+       if (n == 8)
+               {
+               bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
+               bn_mul_comba8(r,&(a[n]),&(b[n]));
+               }
+       else
+# endif
                {
-               bp.used=0;
-               bp.tos=0;
-               bp.sk=sk_new_null();
-               init=0;
+               bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
+               bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
+               }
+
+       /* s0 == low(al*bl)
+        * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
+        * We know s0 and s1 so the only unknown is high(al*bl)
+        * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
+        * high(al*bl) == s1 - (r[0]+l[0]+t[0])
+        */
+       if (l != NULL)
+               {
+               lp= &(t[n2+n]);
+               c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
+               }
+       else
+               {
+               c1=0;
+               lp= &(r[0]);
+               }
+
+       if (neg)
+               neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
+       else
+               {
+               bn_add_words(&(t[n2]),lp,&(t[0]),n);
+               neg=0;
+               }
+
+       if (l != NULL)
+               {
+               bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
+               }
+       else
+               {
+               lp= &(t[n2+n]);
+               mp= &(t[n2]);
+               for (i=0; i<n; i++)
+                       lp[i]=((~mp[i])+1)&BN_MASK2;
+               }
+
+       /* s[0] = low(al*bl)
+        * t[3] = high(al*bl)
+        * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
+        * r[10] = (a[1]*b[1])
+        */
+       /* R[10] = al*bl
+        * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
+        * R[32] = ah*bh
+        */
+       /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
+        * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
+        * R[3]=r[1]+(carry/borrow)
+        */
+       if (l != NULL)
+               {
+               lp= &(t[n2]);
+               c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
+               }
+       else
+               {
+               lp= &(t[n2+n]);
+               c1=0;
+               }
+       c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
+       if (oneg)
+               c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
+       else
+               c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
+
+       c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
+       c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
+       if (oneg)
+               c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
+       else
+               c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
+       
+       if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
+               {
+               i=0;
+               if (c1 > 0)
+                       {
+                       lc=c1;
+                       do      {
+                               ll=(r[i]+lc)&BN_MASK2;
+                               r[i++]=ll;
+                               lc=(lc > ll);
+                               } while (lc);
+                       }
+               else
+                       {
+                       lc= -c1;
+                       do      {
+                               ll=r[i];
+                               r[i++]=(ll-lc)&BN_MASK2;
+                               lc=(lc > ll);
+                               } while (lc);
+                       }
+               }
+       if (c2 != 0) /* Add starting at r[1] */
+               {
+               i=n;
+               if (c2 > 0)
+                       {
+                       lc=c2;
+                       do      {
+                               ll=(r[i]+lc)&BN_MASK2;
+                               r[i++]=ll;
+                               lc=(lc > ll);
+                               } while (lc);
+                       }
+               else
+                       {
+                       lc= -c2;
+                       do      {
+                               ll=r[i];
+                               r[i++]=(ll-lc)&BN_MASK2;
+                               lc=(lc > ll);
+                               } while (lc);
+                       }
                }
-       return(BN_mm(r,a,b,&bp));
        }
+#endif /* BN_RECURSION */
 
-/* r must be different to a and b */
-int BN_mm(m, A, B, bp)
-BIGNUM *m,*A,*B;
-BN_POOL *bp;
+int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
        {
-       int i,num;
-       int an,bn;
-       BIGNUM *a,*b,*c,*d,*ac,*bd;
+       int top,al,bl;
+       BIGNUM *rr;
+       int ret = 0;
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+       int i;
+#endif
+#ifdef BN_RECURSION
+       BIGNUM *t;
+       int j,k;
+#endif
+       BIGNUM *free_a = NULL, *free_b = NULL;
 
-       an=A->top;
-       bn=B->top;
-       if ((an <= limit) || (bn <= limit))
+#ifdef BN_COUNT
+       printf("BN_mul %d * %d\n",a->top,b->top);
+#endif
+
+       bn_check_top(a);
+       bn_check_top(b);
+       bn_check_top(r);
+
+       al=a->top;
+       bl=b->top;
+
+       if ((al == 0) || (bl == 0))
+               {
+               BN_zero(r);
+               return(1);
+               }
+       top=al+bl;
+
+       BN_CTX_start(ctx);
+       if ((r == a) || (r == b))
                {
-               return(BN_mmul(m,A,B));
+               if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
                }
+       else
+               rr = r;
+       rr->neg=a->neg^b->neg;
 
-       a=BN_POOL_push(bp);
-       b=BN_POOL_push(bp);
-       c=BN_POOL_push(bp);
-       d=BN_POOL_push(bp);
-       ac=BN_POOL_push(bp);
-       bd=BN_POOL_push(bp);
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+       i = al-bl;
+#endif
+#ifdef BN_MUL_COMBA
+       if (i == 0)
+               {
+# if 0
+               if (al == 4)
+                       {
+                       if (bn_wexpand(rr,8) == NULL) goto err;
+                       rr->top=8;
+                       bn_mul_comba4(rr->d,a->d,b->d);
+                       goto end;
+                       }
+# endif
+               if (al == 8)
+                       {
+                       if (bn_wexpand(rr,16) == NULL) goto err;
+                       rr->top=16;
+                       bn_mul_comba8(rr->d,a->d,b->d);
+                       goto end;
+                       }
+               }
+#endif /* BN_MUL_COMBA */
+#ifdef BN_RECURSION
+       if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
+               {
+               if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
+                       {
+                       BIGNUM *tmp_bn = free_b;
+                       b = free_b = bn_dup_expand(b,al);
+                       free_b->d[bl]=0;
+                       bl++;
+                       i--;
+                       if (tmp_bn) BN_free(tmp_bn);
+                       }
+               else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
+                       {
+                       BIGNUM *tmp_bn = free_a;
+                       a = free_a = bn_dup_expand(a,bl);
+                       free_a->d[al]=0;
+                       al++;
+                       i++;
+                       if (tmp_bn) BN_free(tmp_bn);
+                       }
+               if (i == 0)
+                       {
+                       /* symmetric and > 4 */
+                       /* 16 or larger */
+                       j=BN_num_bits_word((BN_ULONG)al);
+                       j=1<<(j-1);
+                       k=j+j;
+                       t = BN_CTX_get(ctx);
+                       if (al == j) /* exact multiple */
+                               {
+                               bn_wexpand(t,k*2);
+                               bn_wexpand(rr,k*2);
+                               bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
+                               }
+                       else
+                               {
+                               BIGNUM *tmp_a = free_a,*tmp_b = free_b;
+                               a = free_a = bn_dup_expand(a,k);
+                               b = free_b = bn_dup_expand(b,k);
+                               if (tmp_a) BN_free(tmp_a);
+                               if (tmp_b) BN_free(tmp_b);
+                               bn_wexpand(t,k*4);
+                               bn_wexpand(rr,k*4);
+                               for (i=free_a->top; i<k; i++)
+                                       free_a->d[i]=0;
+                               for (i=free_b->top; i<k; i++)
+                                       free_b->d[i]=0;
+                               bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
+                               }
+                       rr->top=top;
+                       goto end;
+                       }
+               }
+#endif /* BN_RECURSION */
+       if (bn_wexpand(rr,top) == NULL) goto err;
+       rr->top=top;
+       bn_mul_normal(rr->d,a->d,al,b->d,bl);
 
-       num=(an <= bn)?an:bn;
-       num=1<<(BN_num_bits_word(num-1)-1);
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+end:
+#endif
+       bn_fix_top(rr);
+       if (r != rr) BN_copy(r,rr);
+       ret=1;
+err:
+       if (free_a) BN_free(free_a);
+       if (free_b) BN_free(free_b);
+       BN_CTX_end(ctx);
+       return(ret);
+       }
 
-       /* Are going to now chop things into 'num' word chunks. */
-       num*=BN_BITS2;
+void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
+       {
+       BN_ULONG *rr;
 
-       BN_copy(a,A);
-       BN_mask_bits(a,num);
-       BN_rshift(b,A,num);
+#ifdef BN_COUNT
+       printf(" bn_mul_normal %d * %d\n",na,nb);
+#endif
 
-       BN_copy(c,B);
-       BN_mask_bits(c,num);
-       BN_rshift(d,B,num);
+       if (na < nb)
+               {
+               int itmp;
+               BN_ULONG *ltmp;
 
-       BN_sub(ac ,b,a);
-       BN_sub(bd,c,d);
-       BN_mm(m,ac,bd,bp);
-       BN_mm(ac,a,c,bp);
-       BN_mm(bd,b,d,bp);
+               itmp=na; na=nb; nb=itmp;
+               ltmp=a;   a=b;   b=ltmp;
 
-       BN_add(m,m,ac);
-       BN_add(m,m,bd);
-       BN_lshift(m,m,num);
-       BN_lshift(bd,bd,num*2);
+               }
+       rr= &(r[na]);
+       rr[0]=bn_mul_words(r,a,na,b[0]);
 
-       BN_add(m,m,ac);
-       BN_add(m,m,bd);
-       BN_POOL_pop(bp,6);
-       return(1);
+       for (;;)
+               {
+               if (--nb <= 0) return;
+               rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
+               if (--nb <= 0) return;
+               rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
+               if (--nb <= 0) return;
+               rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
+               if (--nb <= 0) return;
+               rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
+               rr+=4;
+               r+=4;
+               b+=4;
+               }
        }
+
+void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+       {
+#ifdef BN_COUNT
+       printf(" bn_mul_low_normal %d * %d\n",n,n);
 #endif
+       bn_mul_words(r,a,n,b[0]);
+
+       for (;;)
+               {
+               if (--n <= 0) return;
+               bn_mul_add_words(&(r[1]),a,n,b[1]);
+               if (--n <= 0) return;
+               bn_mul_add_words(&(r[2]),a,n,b[2]);
+               if (--n <= 0) return;
+               bn_mul_add_words(&(r[3]),a,n,b[3]);
+               if (--n <= 0) return;
+               bn_mul_add_words(&(r[4]),a,n,b[4]);
+               r+=4;
+               b+=4;
+               }
+       }