PR: 2111
[openssl.git] / crypto / bn / bn_mul.c
index 36e9c4d9ffeae93f7bf0adedff317504eca4913b..a0e9ec3b4694cb896a565f2953c56f53eef1da1c 100644 (file)
  * [including the GNU Public Licence.]
  */
 
+#ifndef BN_DEBUG
+# undef NDEBUG /* avoid conflicting definitions */
+# define NDEBUG
+#endif
+
 #include <stdio.h>
+#include <assert.h>
 #include "cryptlib.h"
 #include "bn_lcl.h"
 
+#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
+/* Here follows specialised variants of bn_add_words() and
+   bn_sub_words().  They have the property performing operations on
+   arrays of different sizes.  The sizes of those arrays is expressed through
+   cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
+   which is the delta between the two lengths, calculated as len(a)-len(b).
+   All lengths are the number of BN_ULONGs...  For the operations that require
+   a result array as parameter, it must have the length cl+abs(dl).
+   These functions should probably end up in bn_asm.c as soon as there are
+   assembler counterparts for the systems that use assembler files.  */
+
+BN_ULONG bn_sub_part_words(BN_ULONG *r,
+       const BN_ULONG *a, const BN_ULONG *b,
+       int cl, int dl)
+       {
+       BN_ULONG c, t;
+
+       assert(cl >= 0);
+       c = bn_sub_words(r, a, b, cl);
+
+       if (dl == 0)
+               return c;
+
+       r += cl;
+       a += cl;
+       b += cl;
+
+       if (dl < 0)
+               {
+#ifdef BN_COUNT
+               fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
+#endif
+               for (;;)
+                       {
+                       t = b[0];
+                       r[0] = (0-t-c)&BN_MASK2;
+                       if (t != 0) c=1;
+                       if (++dl >= 0) break;
+
+                       t = b[1];
+                       r[1] = (0-t-c)&BN_MASK2;
+                       if (t != 0) c=1;
+                       if (++dl >= 0) break;
+
+                       t = b[2];
+                       r[2] = (0-t-c)&BN_MASK2;
+                       if (t != 0) c=1;
+                       if (++dl >= 0) break;
+
+                       t = b[3];
+                       r[3] = (0-t-c)&BN_MASK2;
+                       if (t != 0) c=1;
+                       if (++dl >= 0) break;
+
+                       b += 4;
+                       r += 4;
+                       }
+               }
+       else
+               {
+               int save_dl = dl;
+#ifdef BN_COUNT
+               fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
+#endif
+               while(c)
+                       {
+                       t = a[0];
+                       r[0] = (t-c)&BN_MASK2;
+                       if (t != 0) c=0;
+                       if (--dl <= 0) break;
+
+                       t = a[1];
+                       r[1] = (t-c)&BN_MASK2;
+                       if (t != 0) c=0;
+                       if (--dl <= 0) break;
+
+                       t = a[2];
+                       r[2] = (t-c)&BN_MASK2;
+                       if (t != 0) c=0;
+                       if (--dl <= 0) break;
+
+                       t = a[3];
+                       r[3] = (t-c)&BN_MASK2;
+                       if (t != 0) c=0;
+                       if (--dl <= 0) break;
+
+                       save_dl = dl;
+                       a += 4;
+                       r += 4;
+                       }
+               if (dl > 0)
+                       {
+#ifdef BN_COUNT
+                       fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
+#endif
+                       if (save_dl > dl)
+                               {
+                               switch (save_dl - dl)
+                                       {
+                               case 1:
+                                       r[1] = a[1];
+                                       if (--dl <= 0) break;
+                               case 2:
+                                       r[2] = a[2];
+                                       if (--dl <= 0) break;
+                               case 3:
+                                       r[3] = a[3];
+                                       if (--dl <= 0) break;
+                                       }
+                               a += 4;
+                               r += 4;
+                               }
+                       }
+               if (dl > 0)
+                       {
+#ifdef BN_COUNT
+                       fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
+#endif
+                       for(;;)
+                               {
+                               r[0] = a[0];
+                               if (--dl <= 0) break;
+                               r[1] = a[1];
+                               if (--dl <= 0) break;
+                               r[2] = a[2];
+                               if (--dl <= 0) break;
+                               r[3] = a[3];
+                               if (--dl <= 0) break;
+
+                               a += 4;
+                               r += 4;
+                               }
+                       }
+               }
+       return c;
+       }
+#endif
+
+BN_ULONG bn_add_part_words(BN_ULONG *r,
+       const BN_ULONG *a, const BN_ULONG *b,
+       int cl, int dl)
+       {
+       BN_ULONG c, l, t;
+
+       assert(cl >= 0);
+       c = bn_add_words(r, a, b, cl);
+
+       if (dl == 0)
+               return c;
+
+       r += cl;
+       a += cl;
+       b += cl;
+
+       if (dl < 0)
+               {
+               int save_dl = dl;
+#ifdef BN_COUNT
+               fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
+#endif
+               while (c)
+                       {
+                       l=(c+b[0])&BN_MASK2;
+                       c=(l < c);
+                       r[0]=l;
+                       if (++dl >= 0) break;
+
+                       l=(c+b[1])&BN_MASK2;
+                       c=(l < c);
+                       r[1]=l;
+                       if (++dl >= 0) break;
+
+                       l=(c+b[2])&BN_MASK2;
+                       c=(l < c);
+                       r[2]=l;
+                       if (++dl >= 0) break;
+
+                       l=(c+b[3])&BN_MASK2;
+                       c=(l < c);
+                       r[3]=l;
+                       if (++dl >= 0) break;
+
+                       save_dl = dl;
+                       b+=4;
+                       r+=4;
+                       }
+               if (dl < 0)
+                       {
+#ifdef BN_COUNT
+                       fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
+#endif
+                       if (save_dl < dl)
+                               {
+                               switch (dl - save_dl)
+                                       {
+                               case 1:
+                                       r[1] = b[1];
+                                       if (++dl >= 0) break;
+                               case 2:
+                                       r[2] = b[2];
+                                       if (++dl >= 0) break;
+                               case 3:
+                                       r[3] = b[3];
+                                       if (++dl >= 0) break;
+                                       }
+                               b += 4;
+                               r += 4;
+                               }
+                       }
+               if (dl < 0)
+                       {
+#ifdef BN_COUNT
+                       fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
+#endif
+                       for(;;)
+                               {
+                               r[0] = b[0];
+                               if (++dl >= 0) break;
+                               r[1] = b[1];
+                               if (++dl >= 0) break;
+                               r[2] = b[2];
+                               if (++dl >= 0) break;
+                               r[3] = b[3];
+                               if (++dl >= 0) break;
+
+                               b += 4;
+                               r += 4;
+                               }
+                       }
+               }
+       else
+               {
+               int save_dl = dl;
+#ifdef BN_COUNT
+               fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
+#endif
+               while (c)
+                       {
+                       t=(a[0]+c)&BN_MASK2;
+                       c=(t < c);
+                       r[0]=t;
+                       if (--dl <= 0) break;
+
+                       t=(a[1]+c)&BN_MASK2;
+                       c=(t < c);
+                       r[1]=t;
+                       if (--dl <= 0) break;
+
+                       t=(a[2]+c)&BN_MASK2;
+                       c=(t < c);
+                       r[2]=t;
+                       if (--dl <= 0) break;
+
+                       t=(a[3]+c)&BN_MASK2;
+                       c=(t < c);
+                       r[3]=t;
+                       if (--dl <= 0) break;
+
+                       save_dl = dl;
+                       a+=4;
+                       r+=4;
+                       }
+#ifdef BN_COUNT
+               fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
+#endif
+               if (dl > 0)
+                       {
+                       if (save_dl > dl)
+                               {
+                               switch (save_dl - dl)
+                                       {
+                               case 1:
+                                       r[1] = a[1];
+                                       if (--dl <= 0) break;
+                               case 2:
+                                       r[2] = a[2];
+                                       if (--dl <= 0) break;
+                               case 3:
+                                       r[3] = a[3];
+                                       if (--dl <= 0) break;
+                                       }
+                               a += 4;
+                               r += 4;
+                               }
+                       }
+               if (dl > 0)
+                       {
+#ifdef BN_COUNT
+                       fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
+#endif
+                       for(;;)
+                               {
+                               r[0] = a[0];
+                               if (--dl <= 0) break;
+                               r[1] = a[1];
+                               if (--dl <= 0) break;
+                               r[2] = a[2];
+                               if (--dl <= 0) break;
+                               r[3] = a[3];
+                               if (--dl <= 0) break;
+
+                               a += 4;
+                               r += 4;
+                               }
+                       }
+               }
+       return c;
+       }
+
 #ifdef BN_RECURSION
+/* Karatsuba recursive multiplication algorithm
+ * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
+
 /* r is 2*n2 words in size,
  * a and b are both n2 words in size.
  * n2 must be a power of 2.
  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
  * a[1]*b[1]
  */
+/* dnX may not be positive, but n2/2+dnX has to be */
 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
-            BN_ULONG *t)
+       int dna, int dnb, BN_ULONG *t)
        {
        int n=n2/2,c1,c2;
+       int tna=n+dna, tnb=n+dnb;
        unsigned int neg,zero;
        BN_ULONG ln,lo,*p;
 
-#ifdef BN_COUNT
-printf(" bn_mul_recursive %d * %d\n",n2,n2);
-#endif
-#ifdef BN_MUL_COMBA
-/*     if (n2 == 4)
+# ifdef BN_COUNT
+       fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
+# endif
+# ifdef BN_MUL_COMBA
+#  if 0
+       if (n2 == 4)
                {
                bn_mul_comba4(r,a,b);
                return;
                }
-       else */ if (n2 == 8)
+#  endif
+       /* Only call bn_mul_comba 8 if n2 == 8 and the
+        * two arrays are complete [steve]
+        */
+       if (n2 == 8 && dna == 0 && dnb == 0)
                {
                bn_mul_comba8(r,a,b);
                return; 
                }
-#endif
+# endif /* BN_MUL_COMBA */
+       /* Else do normal multiply */
        if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
                {
-               /* This should not happen */
-               bn_mul_normal(r,a,n2,b,n2);
+               bn_mul_normal(r,a,n2+dna,b,n2+dnb);
+               if ((dna + dnb) < 0)
+                       memset(&r[2*n2 + dna + dnb], 0,
+                               sizeof(BN_ULONG) * -(dna + dnb));
                return;
                }
        /* r=(a[0]-a[1])*(b[1]-b[0]) */
-       c1=bn_cmp_words(a,&(a[n]),n);
-       c2=bn_cmp_words(&(b[n]),b,n);
+       c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
+       c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
        zero=neg=0;
        switch (c1*3+c2)
                {
        case -4:
-               bn_sub_words(t,      &(a[n]),a,      n); /* - */
-               bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
+               bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
+               bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
                break;
        case -3:
                zero=1;
                break;
        case -2:
-               bn_sub_words(t,      &(a[n]),a,      n); /* - */
-               bn_sub_words(&(t[n]),&(b[n]),b,      n); /* + */
+               bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
+               bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
                neg=1;
                break;
        case -1:
@@ -123,21 +451,22 @@ printf(" bn_mul_recursive %d * %d\n",n2,n2);
                zero=1;
                break;
        case 2:
-               bn_sub_words(t,      a,      &(a[n]),n); /* + */
-               bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
+               bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
+               bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
                neg=1;
                break;
        case 3:
                zero=1;
                break;
        case 4:
-               bn_sub_words(t,      a,      &(a[n]),n);
-               bn_sub_words(&(t[n]),&(b[n]),b,      n);
+               bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
+               bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
                break;
                }
 
-#ifdef BN_MUL_COMBA
-       if (n == 4)
+# ifdef BN_MUL_COMBA
+       if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
+                                              extra args to do this well */
                {
                if (!zero)
                        bn_mul_comba4(&(t[n2]),t,&(t[n]));
@@ -147,7 +476,9 @@ printf(" bn_mul_recursive %d * %d\n",n2,n2);
                bn_mul_comba4(r,a,b);
                bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
                }
-       else if (n == 8)
+       else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
+                                                   take extra args to do this
+                                                   well */
                {
                if (!zero)
                        bn_mul_comba8(&(t[n2]),t,&(t[n]));
@@ -158,15 +489,15 @@ printf(" bn_mul_recursive %d * %d\n",n2,n2);
                bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
                }
        else
-#endif
+# endif /* BN_MUL_COMBA */
                {
                p= &(t[n2*2]);
                if (!zero)
-                       bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+                       bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
                else
                        memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
-               bn_mul_recursive(r,a,b,n,p);
-               bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
+               bn_mul_recursive(r,a,b,n,0,0,p);
+               bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
                }
 
        /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
@@ -215,86 +546,132 @@ printf(" bn_mul_recursive %d * %d\n",n2,n2);
 
 /* n+tn is the word length
  * t needs to be n*4 is size, as does r */
-void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
-            int n, BN_ULONG *t)
+/* tnX may not be negative but less than n */
+void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
+            int tna, int tnb, BN_ULONG *t)
        {
        int i,j,n2=n*2;
-       unsigned int c1;
+       int c1,c2,neg,zero;
        BN_ULONG ln,lo,*p;
 
-#ifdef BN_COUNT
-printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
-#endif
+# ifdef BN_COUNT
+       fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
+               n, tna, n, tnb);
+# endif
        if (n < 8)
                {
-               i=tn+n;
-               bn_mul_normal(r,a,i,b,i);
+               bn_mul_normal(r,a,n+tna,b,n+tnb);
                return;
                }
 
        /* r=(a[0]-a[1])*(b[1]-b[0]) */
-       bn_sub_words(t,      a,      &(a[n]),n); /* + */
-       bn_sub_words(&(t[n]),b,      &(b[n]),n); /* - */
-
-/*     if (n == 4)
+       c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
+       c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
+       zero=neg=0;
+       switch (c1*3+c2)
+               {
+       case -4:
+               bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
+               bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
+               break;
+       case -3:
+               zero=1;
+               /* break; */
+       case -2:
+               bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
+               bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
+               neg=1;
+               break;
+       case -1:
+       case 0:
+       case 1:
+               zero=1;
+               /* break; */
+       case 2:
+               bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
+               bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
+               neg=1;
+               break;
+       case 3:
+               zero=1;
+               /* break; */
+       case 4:
+               bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
+               bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
+               break;
+               }
+               /* The zero case isn't yet implemented here. The speedup
+                  would probably be negligible. */
+# if 0
+       if (n == 4)
                {
                bn_mul_comba4(&(t[n2]),t,&(t[n]));
                bn_mul_comba4(r,a,b);
                bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
                memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
                }
-       else */ if (n == 8)
+       else
+# endif
+       if (n == 8)
                {
                bn_mul_comba8(&(t[n2]),t,&(t[n]));
                bn_mul_comba8(r,a,b);
-               bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
-               memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
+               bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
+               memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
                }
        else
                {
                p= &(t[n2*2]);
-               bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
-               bn_mul_recursive(r,a,b,n,p);
+               bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
+               bn_mul_recursive(r,a,b,n,0,0,p);
                i=n/2;
                /* If there is only a bottom half to the number,
                 * just do it */
-               j=tn-i;
+               if (tna > tnb)
+                       j = tna - i;
+               else
+                       j = tnb - i;
                if (j == 0)
                        {
-                       bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
+                       bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
+                               i,tna-i,tnb-i,p);
                        memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
                        }
                else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
                                {
                                bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
-                                       j,i,p);
-                               memset(&(r[n2+tn*2]),0,
-                                       sizeof(BN_ULONG)*(n2-tn*2));
+                                       i,tna-i,tnb-i,p);
+                               memset(&(r[n2+tna+tnb]),0,
+                                       sizeof(BN_ULONG)*(n2-tna-tnb));
                                }
                else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
                        {
                        memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
-                       if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
+                       if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
+                               && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
                                {
-                               bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+                               bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
                                }
                        else
                                {
                                for (;;)
                                        {
                                        i/=2;
-                                       if (i < tn)
+                                       /* these simplified conditions work
+                                        * exclusively because difference
+                                        * between tna and tnb is 1 or 0 */
+                                       if (i < tna || i < tnb)
                                                {
                                                bn_mul_part_recursive(&(r[n2]),
                                                        &(a[n]),&(b[n]),
-                                                       tn-i,i,p);
+                                                       i,tna-i,tnb-i,p);
                                                break;
                                                }
-                                       else if (i == tn)
+                                       else if (i == tna || i == tnb)
                                                {
                                                bn_mul_recursive(&(r[n2]),
                                                        &(a[n]),&(b[n]),
-                                                       i,p);
+                                                       i,tna-i,tnb-i,p);
                                                break;
                                                }
                                        }
@@ -308,7 +685,16 @@ printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
         */
 
        c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
-       c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
+
+       if (neg) /* if t[32] is negative */
+               {
+               c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
+               }
+       else
+               {
+               /* Might have a carry */
+               c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
+               }
 
        /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
         * r[10] holds (a[0]*b[0])
@@ -325,7 +711,7 @@ printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
 
                /* The overflow will stop before we over write
                 * words we should not overwrite */
-               if (ln < c1)
+               if (ln < (BN_ULONG)c1)
                        {
                        do      {
                                p++;
@@ -345,11 +731,11 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
        {
        int n=n2/2;
 
-#ifdef BN_COUNT
-printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
-#endif
+# ifdef BN_COUNT
+       fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
+# endif
 
-       bn_mul_recursive(r,a,b,n,&(t[0]));
+       bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
        if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
                {
                bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
@@ -379,9 +765,9 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
        int neg,oneg,zero;
        BN_ULONG ll,lc,*lp,*mp;
 
-#ifdef BN_COUNT
-printf(" bn_mul_high %d * %d\n",n2,n2);
-#endif
+# ifdef BN_COUNT
+       fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
+# endif
        n=n2/2;
 
        /* Calculate (al-ah)*(bh-bl) */
@@ -424,17 +810,17 @@ printf(" bn_mul_high %d * %d\n",n2,n2);
        oneg=neg;
        /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
        /* r[10] = (a[1]*b[1]) */
-#ifdef BN_MUL_COMBA
+# ifdef BN_MUL_COMBA
        if (n == 8)
                {
                bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
                bn_mul_comba8(r,&(a[n]),&(b[n]));
                }
        else
-#endif
+# endif
                {
-               bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
-               bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
+               bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
+               bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
                }
 
        /* s0 == low(al*bl)
@@ -555,19 +941,23 @@ printf(" bn_mul_high %d * %d\n",n2,n2);
                        }
                }
        }
-#endif
+#endif /* BN_RECURSION */
 
-int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
+int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
        {
+       int ret=0;
        int top,al,bl;
        BIGNUM *rr;
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+       int i;
+#endif
 #ifdef BN_RECURSION
-       BIGNUM *t;
-       int i,j,k;
+       BIGNUM *t=NULL;
+       int j=0,k;
 #endif
 
 #ifdef BN_COUNT
-printf("BN_mul %d * %d\n",a->top,b->top);
+       fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
 #endif
 
        bn_check_top(a);
@@ -576,7 +966,6 @@ printf("BN_mul %d * %d\n",a->top,b->top);
 
        al=a->top;
        bl=b->top;
-       r->neg=a->neg^b->neg;
 
        if ((al == 0) || (bl == 0))
                {
@@ -592,115 +981,129 @@ printf("BN_mul %d * %d\n",a->top,b->top);
                }
        else
                rr = r;
+       rr->neg=a->neg^b->neg;
 
 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
-       if (al == bl)
+       i = al-bl;
+#endif
+#ifdef BN_MUL_COMBA
+       if (i == 0)
                {
-#  ifdef BN_MUL_COMBA
-/*             if (al == 4)
+# if 0
+               if (al == 4)
                        {
                        if (bn_wexpand(rr,8) == NULL) goto err;
                        rr->top=8;
                        bn_mul_comba4(rr->d,a->d,b->d);
                        goto end;
                        }
-               else */ if (al == 8)
+# endif
+               if (al == 8)
                        {
                        if (bn_wexpand(rr,16) == NULL) goto err;
                        rr->top=16;
                        bn_mul_comba8(rr->d,a->d,b->d);
                        goto end;
                        }
-               else
-#  endif
+               }
+#endif /* BN_MUL_COMBA */
 #ifdef BN_RECURSION
-               if (al < BN_MULL_SIZE_NORMAL)
-#endif
+       if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
+               {
+               if (i >= -1 && i <= 1)
                        {
-                       if (bn_wexpand(rr,top) == NULL) goto err;
+                       int sav_j =0;
+                       /* Find out the power of two lower or equal
+                          to the longest of the two numbers */
+                       if (i >= 0)
+                               {
+                               j = BN_num_bits_word((BN_ULONG)al);
+                               }
+                       if (i == -1)
+                               {
+                               j = BN_num_bits_word((BN_ULONG)bl);
+                               }
+                       sav_j = j;
+                       j = 1<<(j-1);
+                       assert(j <= al || j <= bl);
+                       k = j+j;
+                       t = BN_CTX_get(ctx);
+                       if (t == NULL)
+                               goto err;
+                       if (al > j || bl > j)
+                               {
+                               if (bn_wexpand(t,k*4) == NULL) goto err;
+                               if (bn_wexpand(rr,k*4) == NULL) goto err;
+                               bn_mul_part_recursive(rr->d,a->d,b->d,
+                                       j,al-j,bl-j,t->d);
+                               }
+                       else    /* al <= j || bl <= j */
+                               {
+                               if (bn_wexpand(t,k*2) == NULL) goto err;
+                               if (bn_wexpand(rr,k*2) == NULL) goto err;
+                               bn_mul_recursive(rr->d,a->d,b->d,
+                                       j,al-j,bl-j,t->d);
+                               }
                        rr->top=top;
-                       bn_mul_normal(rr->d,a->d,al,b->d,bl);
                        goto end;
                        }
-#  ifdef BN_RECURSION
-               goto symmetric;
-#  endif
-               }
-#endif
-#ifdef BN_RECURSION
-       else if ((al < BN_MULL_SIZE_NORMAL) || (bl < BN_MULL_SIZE_NORMAL))
-               {
-               if (bn_wexpand(rr,top) == NULL) goto err;
-               rr->top=top;
-               bn_mul_normal(rr->d,a->d,al,b->d,bl);
-               goto end;
-               }
-       else
-               {
-               i=(al-bl);
-               if ((i ==  1) && !BN_get_flags(b,BN_FLG_STATIC_DATA))
+#if 0
+               if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
                        {
-                       bn_wexpand(b,al);
-                       b->d[bl]=0;
+                       BIGNUM *tmp_bn = (BIGNUM *)b;
+                       if (bn_wexpand(tmp_bn,al) == NULL) goto err;
+                       tmp_bn->d[bl]=0;
                        bl++;
-                       goto symmetric;
+                       i--;
                        }
-               else if ((i ==  -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA))
+               else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
                        {
-                       bn_wexpand(a,bl);
-                       a->d[al]=0;
+                       BIGNUM *tmp_bn = (BIGNUM *)a;
+                       if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
+                       tmp_bn->d[al]=0;
                        al++;
-                       goto symmetric;
+                       i++;
+                       }
+               if (i == 0)
+                       {
+                       /* symmetric and > 4 */
+                       /* 16 or larger */
+                       j=BN_num_bits_word((BN_ULONG)al);
+                       j=1<<(j-1);
+                       k=j+j;
+                       t = BN_CTX_get(ctx);
+                       if (al == j) /* exact multiple */
+                               {
+                               if (bn_wexpand(t,k*2) == NULL) goto err;
+                               if (bn_wexpand(rr,k*2) == NULL) goto err;
+                               bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
+                               }
+                       else
+                               {
+                               if (bn_wexpand(t,k*4) == NULL) goto err;
+                               if (bn_wexpand(rr,k*4) == NULL) goto err;
+                               bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
+                               }
+                       rr->top=top;
+                       goto end;
                        }
-               }
 #endif
-
-       /* asymmetric and >= 4 */ 
+               }
+#endif /* BN_RECURSION */
        if (bn_wexpand(rr,top) == NULL) goto err;
        rr->top=top;
        bn_mul_normal(rr->d,a->d,al,b->d,bl);
 
-#ifdef BN_RECURSION
-       if (0)
-               {
-symmetric:
-               /* symmetric and > 4 */
-               /* 16 or larger */
-               j=BN_num_bits_word((BN_ULONG)al);
-               j=1<<(j-1);
-               k=j+j;
-               t = BN_CTX_get(ctx);
-               if (al == j) /* exact multiple */
-                       {
-                       bn_wexpand(t,k*2);
-                       bn_wexpand(rr,k*2);
-                       bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
-                       }
-               else
-                       {
-                       bn_wexpand(a,k);
-                       bn_wexpand(b,k);
-                       bn_wexpand(t,k*4);
-                       bn_wexpand(rr,k*4);
-                       for (i=a->top; i<k; i++)
-                               a->d[i]=0;
-                       for (i=b->top; i<k; i++)
-                               b->d[i]=0;
-                       bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
-                       }
-               rr->top=top;
-               }
-#endif
 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
 end:
 #endif
-       bn_fix_top(rr);
+       bn_correct_top(rr);
        if (r != rr) BN_copy(r,rr);
-       BN_CTX_end(ctx);
-       return(1);
+       ret=1;
 err:
+       bn_check_top(r);
        BN_CTX_end(ctx);
-       return(0);
+       return(ret);
        }
 
 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
@@ -708,7 +1111,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
        BN_ULONG *rr;
 
 #ifdef BN_COUNT
-printf(" bn_mul_normal %d * %d\n",na,nb);
+       fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
 #endif
 
        if (na < nb)
@@ -721,7 +1124,13 @@ printf(" bn_mul_normal %d * %d\n",na,nb);
 
                }
        rr= &(r[na]);
-       rr[0]=bn_mul_words(r,a,na,b[0]);
+       if (nb <= 0)
+               {
+               (void)bn_mul_words(r,a,na,0);
+               return;
+               }
+       else
+               rr[0]=bn_mul_words(r,a,na,b[0]);
 
        for (;;)
                {
@@ -742,7 +1151,7 @@ printf(" bn_mul_normal %d * %d\n",na,nb);
 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
        {
 #ifdef BN_COUNT
-printf(" bn_mul_low_normal %d * %d\n",n,n);
+       fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
 #endif
        bn_mul_words(r,a,n,b[0]);
 
@@ -760,4 +1169,3 @@ printf(" bn_mul_low_normal %d * %d\n",n,n);
                b+=4;
                }
        }
-