Totally remove the supposedly 'faster' variant in
[openssl.git] / crypto / bn / bn_sqr.c
index a8464610e5af8fc4f650cf9d93163f22cc7b6a36..75f4f38392dcb686f005800652c23de64ea76efd 100644 (file)
 
 /* r must not be a */
 /* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
-int BN_sqr(r, a, ctx)
-BIGNUM *r;
-BIGNUM *a;
-BN_CTX *ctx;
+int BN_sqr(BIGNUM *r, BIGNUM *a, BN_CTX *ctx)
        {
-       int i,j,max,al;
-       BIGNUM *tmp;
-       BN_ULONG *ap,*rp;
+       int max,al;
+       int ret = 0;
+       BIGNUM *tmp,*rr;
 
-       tmp=ctx->bn[ctx->tos];
+#ifdef BN_COUNT
+printf("BN_sqr %d * %d\n",a->top,a->top);
+#endif
+       bn_check_top(a);
 
        al=a->top;
-       if (al == 0)
+       if (al <= 0)
                {
                r->top=0;
                return(1);
                }
 
-       max=(al*2);
-       if (bn_wexpand(r,1+max) == NULL) return(0);
-       if (bn_wexpand(tmp,1+max) == NULL) return(0);
+       BN_CTX_start(ctx);
+       rr=(a != r) ? r : BN_CTX_get(ctx);
+       tmp=BN_CTX_get(ctx);
+       if (tmp == NULL) goto err;
+
+       max=(al+al);
+       if (bn_wexpand(rr,max+1) == NULL) goto err;
 
        r->neg=0;
+       if (al == 4)
+               {
+#ifndef BN_SQR_COMBA
+               BN_ULONG t[8];
+               bn_sqr_normal(rr->d,a->d,4,t);
+#else
+               bn_sqr_comba4(rr->d,a->d);
+#endif
+               }
+       else if (al == 8)
+               {
+#ifndef BN_SQR_COMBA
+               BN_ULONG t[16];
+               bn_sqr_normal(rr->d,a->d,8,t);
+#else
+               bn_sqr_comba8(rr->d,a->d);
+#endif
+               }
+       else 
+               {
+#if defined(BN_RECURSION)
+               if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
+                       {
+                       BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
+                       bn_sqr_normal(rr->d,a->d,al,t);
+                       }
+               else
+                       {
+                       int j,k;
+
+                       j=BN_num_bits_word((BN_ULONG)al);
+                       j=1<<(j-1);
+                       k=j+j;
+                       if (al == j)
+                               {
+                               if (bn_wexpand(a,k*2) == NULL) goto err;
+                               if (bn_wexpand(tmp,k*2) == NULL) goto err;
+                               bn_sqr_recursive(rr->d,a->d,al,tmp->d);
+                               }
+                       else
+                               {
+                               if (bn_wexpand(tmp,max) == NULL) goto err;
+                               bn_sqr_normal(rr->d,a->d,al,tmp->d);
+                               }
+                       }
+#else
+               if (bn_wexpand(tmp,max) == NULL) goto err;
+               bn_sqr_normal(rr->d,a->d,al,tmp->d);
+#endif
+               }
 
-       ap=a->d;
-       rp=r->d;
+       rr->top=max;
+       if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
+       if (rr != r) BN_copy(r,rr);
+       ret = 1;
+ err:
+       BN_CTX_end(ctx);
+       return(ret);
+       }
+
+/* tmp must have 2*n words */
+void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp)
+       {
+       int i,j,max;
+       BN_ULONG *ap,*rp;
+
+       max=n*2;
+       ap=a;
+       rp=r;
        rp[0]=rp[max-1]=0;
        rp++;
-       j=al;
+       j=n;
 
        if (--j > 0)
                {
@@ -99,7 +169,7 @@ BN_CTX *ctx;
                rp+=2;
                }
 
-       for (i=2; i<al; i++)
+       for (i=n-2; i>0; i--)
                {
                j--;
                ap++;
@@ -107,16 +177,112 @@ BN_CTX *ctx;
                rp+=2;
                }
 
-       bn_add_words(r->d,r->d,r->d,max);
+       bn_add_words(r,r,r,max);
 
        /* There will not be a carry */
 
-       bn_sqr_words(tmp->d,a->d,al);
-
-       bn_add_words(r->d,r->d,tmp->d,max);
+       bn_sqr_words(tmp,a,n);
 
-       r->top=max;
-       if (r->d[max-1] == 0) r->top--;
-       return(1);
+       bn_add_words(r,r,tmp,max);
        }
 
+#ifdef BN_RECURSION
+/* r is 2*n words in size,
+ * a and b are both n words in size.    (There's not actually a 'b' here ...)
+ * n must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n words in size
+ * We calculate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t)
+       {
+       int n=n2/2;
+       int zero,c1;
+       BN_ULONG ln,lo,*p;
+
+#ifdef BN_COUNT
+printf(" bn_sqr_recursive %d * %d\n",n2,n2);
+#endif
+       if (n2 == 4)
+               {
+#ifndef BN_SQR_COMBA
+               bn_sqr_normal(r,a,4,t);
+#else
+               bn_sqr_comba4(r,a);
+#endif
+               return;
+               }
+       else if (n2 == 8)
+               {
+#ifndef BN_SQR_COMBA
+               bn_sqr_normal(r,a,8,t);
+#else
+               bn_sqr_comba8(r,a);
+#endif
+               return;
+               }
+       if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
+               {
+               bn_sqr_normal(r,a,n2,t);
+               return;
+               }
+       /* r=(a[0]-a[1])*(a[1]-a[0]) */
+       c1=bn_cmp_words(a,&(a[n]),n);
+       zero=0;
+       if (c1 > 0)
+               bn_sub_words(t,a,&(a[n]),n);
+       else if (c1 < 0)
+               bn_sub_words(t,&(a[n]),a,n);
+       else
+               zero=1;
+
+       /* The result will always be negative unless it is zero */
+       p= &(t[n2*2]);
+
+       if (!zero)
+               bn_sqr_recursive(&(t[n2]),t,n,p);
+       else
+               memset(&(t[n2]),0,n*sizeof(BN_ULONG));
+       bn_sqr_recursive(r,a,n,p);
+       bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
+
+       /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
+        * r[10] holds (a[0]*b[0])
+        * r[32] holds (b[1]*b[1])
+        */
+
+       c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
+
+       /* t[32] is negative */
+       c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
+
+       /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
+        * r[10] holds (a[0]*a[0])
+        * r[32] holds (a[1]*a[1])
+        * c1 holds the carry bits
+        */
+       c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
+       if (c1)
+               {
+               p= &(r[n+n2]);
+               lo= *p;
+               ln=(lo+c1)&BN_MASK2;
+               *p=ln;
+
+               /* The overflow will stop before we over write
+                * words we should not overwrite */
+               if (ln < (BN_ULONG)c1)
+                       {
+                       do      {
+                               p++;
+                               lo= *p;
+                               ln=(lo+1)&BN_MASK2;
+                               *p=ln;
+                               } while (ln == 0);
+                       }
+               }
+       }
+#endif