ARM assembler pack: engage newly introduced armv4-gf2m module.

[openssl.git] / crypto / bn / bn_gf2m.c
diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c

index 0bb4f9b2515f62a43c4d0e2ad74466df92c4d765..19a101bccdaf429f8f0c581801e67211bb35f368 100644 (file)
--- a/crypto/bn/bn_gf2m.c
+++ b/crypto/bn/bn_gf2m.c
@@ -88,12 +88,16 @@
   *
   */
  
+#define OPENSSL_FIPSAPI
+
  #include <assert.h>
  #include <limits.h>
  #include <stdio.h>
  #include "cryptlib.h"
  #include "bn_lcl.h"
  
+#ifndef OPENSSL_NO_EC2M
+
  /* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */
  #define MAX_ITERATIONS 50
  
@@ -121,74 +125,13 @@ static const BN_ULONG SQR_tb[16] =
      SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >>  8 & 0xF] << 16 | \
      SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
  #endif
-#ifdef SIXTEEN_BIT
-#define SQR1(w) \
-    SQR_tb[(w) >> 12 & 0xF] <<  8 | SQR_tb[(w) >>  8 & 0xF]
-#define SQR0(w) \
-    SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
-#endif
-#ifdef EIGHT_BIT
-#define SQR1(w) \
-    SQR_tb[(w) >>  4 & 0xF]
-#define SQR0(w) \
-    SQR_tb[(w)       & 15]
-#endif
  
+#if !defined(OPENSSL_BN_ASM_GF2m)
  /* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
   * result is a polynomial r with degree < 2 * BN_BITS - 1
   * The caller MUST ensure that the variables have the right amount
   * of space allocated.
   */
-#ifdef EIGHT_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
-       {
-       register BN_ULONG h, l, s;
-       BN_ULONG tab[4], top1b = a >> 7;
-       register BN_ULONG a1, a2;
-
-       a1 = a & (0x7F); a2 = a1 << 1;
-
-       tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-
-       s = tab[b      & 0x3]; l  = s;
-       s = tab[b >> 2 & 0x3]; l ^= s << 2; h  = s >> 6;
-       s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4;
-       s = tab[b >> 6      ]; l ^= s << 6; h ^= s >> 2;
-       
-       /* compensate for the top bit of a */
-
-       if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } 
-
-       *r1 = h; *r0 = l;
-       } 
-#endif
-#ifdef SIXTEEN_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
-       {
-       register BN_ULONG h, l, s;
-       BN_ULONG tab[4], top1b = a >> 15; 
-       register BN_ULONG a1, a2;
-
-       a1 = a & (0x7FFF); a2 = a1 << 1;
-
-       tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-
-       s = tab[b      & 0x3]; l  = s;
-       s = tab[b >> 2 & 0x3]; l ^= s <<  2; h  = s >> 14;
-       s = tab[b >> 4 & 0x3]; l ^= s <<  4; h ^= s >> 12;
-       s = tab[b >> 6 & 0x3]; l ^= s <<  6; h ^= s >> 10;
-       s = tab[b >> 8 & 0x3]; l ^= s <<  8; h ^= s >>  8;
-       s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >>  6;
-       s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >>  4;
-       s = tab[b >>14      ]; l ^= s << 14; h ^= s >>  2;
-
-       /* compensate for the top bit of a */
-
-       if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } 
-
-       *r1 = h; *r0 = l;
-       } 
-#endif
  #ifdef THIRTY_TWO_BIT
  static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
         {
@@ -228,7 +171,7 @@ static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const
         BN_ULONG tab[16], top3b = a >> 61;
         register BN_ULONG a1, a2, a4, a8;
  
-       a1 = a & (0x1FFFFFFFFFFFFFFF); a2 = a1 << 1; a4 = a2 << 1; a8 = a4 << 1;
+       a1 = a & (0x1FFFFFFFFFFFFFFFULL); a2 = a1 << 1; a4 = a2 << 1; a8 = a4 << 1;
  
         tab[ 0] = 0;     tab[ 1] = a1;       tab[ 2] = a2;       tab[ 3] = a1^a2;
         tab[ 4] = a4;    tab[ 5] = a1^a4;    tab[ 6] = a2^a4;    tab[ 7] = a1^a2^a4;
@@ -278,7 +221,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
         r[2] ^= m1 ^ r[1] ^ r[3];  /* h0 ^= m1 ^ l1 ^ h1; */
         r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0;  /* l1 ^= l0 ^ h0 ^ m0; */
         }
-
+#else
+void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
+#endif 
  
  /* Add polynomials a and b and store result in r; r could be a or b, a and b 
   * could be equal; r is the bitwise XOR of a and b.
@@ -288,10 +233,14 @@ int       BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
         int i;
         const BIGNUM *at, *bt;
  
+       bn_check_top(a);
+       bn_check_top(b);
+
         if (a->top < b->top) { at = b; bt = a; }
         else { at = a; bt = b; }
  
-       bn_wexpand(r, at->top);
+       if(bn_wexpand(r, at->top) == NULL)
+               return 0;
  
         for (i = 0; i < bt->top; i++)
                 {
@@ -317,14 +266,23 @@ int       BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
  
  
  /* Performs modular reduction of a and store result in r.  r could be a. */
-int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
+int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
         {
         int j, k;
         int n, dN, d0, d1;
         BN_ULONG zz, *z;
-       
-       /* Since the algorithm does reduction in the r value, if a != r, copy the
-        * contents of a into r so we can do reduction in r. 
+
+       bn_check_top(a);
+
+       if (!p[0])
+               {
+               /* reduction mod 1 => return 0 */
+               BN_zero(r);
+               return 1;
+               }
+
+       /* Since the algorithm does reduction in the r value, if a != r, copy
+        * the contents of a into r so we can do reduction in r. 
          */
         if (a != r)
                 {
@@ -345,7 +303,7 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
                 if (z[j] == 0) { j--; continue; }
                 z[j] = 0;
  
-               for (k = 1; p[k] > 0; k++)
+               for (k = 1; p[k] != 0; k++)
                         {
                         /* reducing component t^p[k] */
                         n = p[0] - p[k];
@@ -372,10 +330,14 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
                 if (zz == 0) break;
                 d1 = BN_BITS2 - d0;
                 
-               if (d0) z[dN] = (z[dN] << d1) >> d1; /* clear up the top d1 bits */
+               /* clear up the top d1 bits */
+               if (d0)
+                       z[dN] = (z[dN] << d1) >> d1;
+               else
+                       z[dN] = 0;
                 z[0] ^= zz; /* reduction t^0 component */
  
-               for (k = 1; p[k] > 0; k++)
+               for (k = 1; p[k] != 0; k++)
                         {
                         BN_ULONG tmp_ulong;
  
@@ -393,7 +355,6 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
                 }
  
         bn_correct_top(r);
-       
         return 1;
         }
  
@@ -405,18 +366,18 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
   */
  int    BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
         {
-       const int max = BN_num_bits(p);
-       unsigned int *arr=NULL, ret = 0;
-       if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-       if (BN_GF2m_poly2arr(p, arr, max) > max)
+       int ret = 0;
+       int arr[6];
+       bn_check_top(a);
+       bn_check_top(p);
+       ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0]));
+       if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0])))
                 {
                 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
-               goto err;
+               return 0;
                 }
         ret = BN_GF2m_mod_arr(r, a, arr);
         bn_check_top(r);
-  err:
-       if (arr) OPENSSL_free(arr);
         return ret;
         }
  
@@ -424,17 +385,19 @@ int       BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
  /* Compute the product of two polynomials a and b, reduce modulo p, and store
   * the result in r.  r could be a or b; a could be b.
   */
-int    BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int    BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
         {
         int zlen, i, j, k, ret = 0;
         BIGNUM *s;
         BN_ULONG x1, x0, y1, y0, zz[4];
-       
+
+       bn_check_top(a);
+       bn_check_top(b);
+
         if (a == b)
                 {
                 return BN_GF2m_mod_sqr_arr(r, a, p, ctx);
                 }
-       
  
         BN_CTX_start(ctx);
         if ((s = BN_CTX_get(ctx)) == NULL) goto err;
@@ -459,14 +422,13 @@ int       BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsig
                 }
  
         bn_correct_top(s);
-       BN_GF2m_mod_arr(r, s, p);
+       if (BN_GF2m_mod_arr(r, s, p))
+               ret = 1;
         bn_check_top(r);
-       ret = 1;
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
-       
         }
  
  /* Compute the product of two polynomials a and b, reduce modulo p, and store
@@ -478,28 +440,34 @@ int       BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsig
   */
  int    BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
         {
-       const int max = BN_num_bits(p);
-       unsigned int *arr=NULL, ret = 0;
-       if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-       if (BN_GF2m_poly2arr(p, arr, max) > max)
+       int ret = 0;
+       const int max = BN_num_bits(p) + 1;
+       int *arr=NULL;
+       bn_check_top(a);
+       bn_check_top(b);
+       bn_check_top(p);
+       if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
+       ret = BN_GF2m_poly2arr(p, arr, max);
+       if (!ret || ret > max)
                 {
                 BNerr(BN_F_BN_GF2M_MOD_MUL,BN_R_INVALID_LENGTH);
                 goto err;
                 }
         ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx);
         bn_check_top(r);
-  err:
+err:
         if (arr) OPENSSL_free(arr);
         return ret;
         }
  
  
  /* Square a, reduce the result mod p, and store it in a.  r could be a. */
-int    BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int    BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
         {
         int i, ret = 0;
         BIGNUM *s;
-       
+
+       bn_check_top(a);
         BN_CTX_start(ctx);
         if ((s = BN_CTX_get(ctx)) == NULL) return 0;
         if (!bn_wexpand(s, 2 * a->top)) goto err;
@@ -515,7 +483,7 @@ int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_C
         if (!BN_GF2m_mod_arr(r, s, p)) goto err;
         bn_check_top(r);
         ret = 1;
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -528,17 +496,22 @@ int       BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_C
   */
  int    BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
         {
-       const int max = BN_num_bits(p);
-       unsigned int *arr=NULL, ret = 0;
-       if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-       if (BN_GF2m_poly2arr(p, arr, max) > max)
+       int ret = 0;
+       const int max = BN_num_bits(p) + 1;
+       int *arr=NULL;
+
+       bn_check_top(a);
+       bn_check_top(p);
+       if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
+       ret = BN_GF2m_poly2arr(p, arr, max);
+       if (!ret || ret > max)
                 {
                 BNerr(BN_F_BN_GF2M_MOD_SQR,BN_R_INVALID_LENGTH);
                 goto err;
                 }
         ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx);
         bn_check_top(r);
-  err:
+err:
         if (arr) OPENSSL_free(arr);
         return ret;
         }
@@ -554,25 +527,23 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
         BIGNUM *b, *c, *u, *v, *tmp;
         int ret = 0;
  
+       bn_check_top(a);
+       bn_check_top(p);
+
         BN_CTX_start(ctx);
         
-       b = BN_CTX_get(ctx);
-       c = BN_CTX_get(ctx);
-       u = BN_CTX_get(ctx);
-       v = BN_CTX_get(ctx);
-       if (v == NULL) goto err;
+       if ((b = BN_CTX_get(ctx))==NULL) goto err;
+       if ((c = BN_CTX_get(ctx))==NULL) goto err;
+       if ((u = BN_CTX_get(ctx))==NULL) goto err;
+       if ((v = BN_CTX_get(ctx))==NULL) goto err;
  
-       if (!BN_one(b)) goto err;
-       if (!BN_zero(c)) goto err;
         if (!BN_GF2m_mod(u, a, p)) goto err;
-       if (!BN_copy(v, p)) goto err;
-
-       u->neg = 0; /* Need to set u->neg = 0 because BN_is_one(u) checks
-                    * the neg flag of the bignum.
-                    */
-
         if (BN_is_zero(u)) goto err;
  
+       if (!BN_copy(v, p)) goto err;
+#if 0
+       if (!BN_one(b)) goto err;
+
         while (1)
                 {
                 while (!BN_is_odd(u))
@@ -585,7 +556,7 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
                         if (!BN_rshift1(b, b)) goto err;
                         }
  
-               if (BN_is_one(u)) break;
+               if (BN_abs_is_word(u, 1)) break;
  
                 if (BN_num_bits(u) < BN_num_bits(v))
                         {
@@ -596,13 +567,81 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
                 if (!BN_GF2m_add(u, u, v)) goto err;
                 if (!BN_GF2m_add(b, b, c)) goto err;
                 }
+#else
+       {
+       int i,  ubits = BN_num_bits(u),
+               vbits = BN_num_bits(v), /* v is copy of p */
+               top = p->top;
+       BN_ULONG *udp,*bdp,*vdp,*cdp;
+
+       bn_wexpand(u,top);      udp = u->d;
+                               for (i=u->top;i<top;i++) udp[i] = 0;
+                               u->top = top;
+       bn_wexpand(b,top);      bdp = b->d;
+                               bdp[0] = 1;
+                               for (i=1;i<top;i++) bdp[i] = 0;
+                               b->top = top;
+       bn_wexpand(c,top);      cdp = c->d;
+                               for (i=0;i<top;i++) cdp[i] = 0;
+                               c->top = top;
+       vdp = v->d;     /* It pays off to "cache" *->d pointers, because
+                        * it allows optimizer to be more aggressive.
+                        * But we don't have to "cache" p->d, because *p
+                        * is declared 'const'... */
+       while (1)
+               {
+               while (ubits && !(udp[0]&1))
+                       {
+                       BN_ULONG u0,u1,b0,b1,mask;
+
+                       u0   = udp[0];
+                       b0   = bdp[0];
+                       mask = (BN_ULONG)0-(b0&1);
+                       b0  ^= p->d[0]&mask;
+                       for (i=0;i<top-1;i++)
+                               {
+                               u1 = udp[i+1];
+                               udp[i] = ((u0>>1)|(u1<<(BN_BITS2-1)))&BN_MASK2;
+                               u0 = u1;
+                               b1 = bdp[i+1]^(p->d[i+1]&mask);
+                               bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2;
+                               b0 = b1;
+                               }
+                       udp[i] = u0>>1;
+                       bdp[i] = b0>>1;
+                       ubits--;
+                       }
  
+               if (ubits<=BN_BITS2 && udp[0]==1) break;
+
+               if (ubits<vbits)
+                       {
+                       i = ubits; ubits = vbits; vbits = i;
+                       tmp = u; u = v; v = tmp;
+                       tmp = b; b = c; c = tmp;
+                       udp = vdp; vdp = v->d;
+                       bdp = cdp; cdp = c->d;
+                       }
+               for(i=0;i<top;i++)
+                       {
+                       udp[i] ^= vdp[i];
+                       bdp[i] ^= cdp[i];
+                       }
+               if (ubits==vbits)
+                       {
+                       bn_fix_top(u);
+                       ubits = BN_num_bits(u);
+                       }
+               }
+       bn_fix_top(b);
+       }
+#endif
  
         if (!BN_copy(r, b)) goto err;
         bn_check_top(r);
         ret = 1;
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -613,11 +652,12 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
   * function is only provided for convenience; for best performance, use the 
   * BN_GF2m_mod_inv function.
   */
-int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx)
         {
         BIGNUM *field;
         int ret = 0;
  
+       bn_check_top(xx);
         BN_CTX_start(ctx);
         if ((field = BN_CTX_get(ctx)) == NULL) goto err;
         if (!BN_GF2m_arr2poly(p, field)) goto err;
@@ -625,7 +665,7 @@ int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_
         ret = BN_GF2m_mod_inv(r, xx, field, ctx);
         bn_check_top(r);
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -639,7 +679,11 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
         {
         BIGNUM *xinv = NULL;
         int ret = 0;
-       
+
+       bn_check_top(y);
+       bn_check_top(x);
+       bn_check_top(p);
+
         BN_CTX_start(ctx);
         xinv = BN_CTX_get(ctx);
         if (xinv == NULL) goto err;
@@ -649,7 +693,7 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
         bn_check_top(r);
         ret = 1;
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -665,6 +709,10 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
         BIGNUM *a, *b, *u, *v;
         int ret = 0;
  
+       bn_check_top(y);
+       bn_check_top(x);
+       bn_check_top(p);
+
         BN_CTX_start(ctx);
         
         a = BN_CTX_get(ctx);
@@ -677,12 +725,7 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
         if (!BN_GF2m_mod(u, y, p)) goto err;
         if (!BN_GF2m_mod(a, x, p)) goto err;
         if (!BN_copy(b, p)) goto err;
-       if (!BN_zero(v)) goto err;
         
-       a->neg = 0; /* Need to set a->neg = 0 because BN_is_one(a) checks
-                    * the neg flag of the bignum.
-                    */
-
         while (!BN_is_odd(a))
                 {
                 if (!BN_rshift1(a, a)) goto err;
@@ -703,7 +746,7 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
                                 if (!BN_rshift1(v, v)) goto err;
                                 } while (!BN_is_odd(b));
                         }
-               else if (BN_is_one(a))
+               else if (BN_abs_is_word(a, 1))
                         break;
                 else
                         {
@@ -722,7 +765,7 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
         bn_check_top(r);
         ret = 1;
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -735,11 +778,14 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p
   * function is only provided for convenience; for best performance, use the 
   * BN_GF2m_mod_div function.
   */
-int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx)
         {
         BIGNUM *field;
         int ret = 0;
  
+       bn_check_top(yy);
+       bn_check_top(xx);
+
         BN_CTX_start(ctx);
         if ((field = BN_CTX_get(ctx)) == NULL) goto err;
         if (!BN_GF2m_arr2poly(p, field)) goto err;
@@ -747,7 +793,7 @@ int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const uns
         ret = BN_GF2m_mod_div(r, yy, xx, field, ctx);
         bn_check_top(r);
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -757,16 +803,19 @@ int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const uns
   * the result in r.  r could be a.
   * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.
   */
-int    BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int    BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
         {
         int ret = 0, i, n;
         BIGNUM *u;
-       
+
+       bn_check_top(a);
+       bn_check_top(b);
+
         if (BN_is_zero(b))
-               {
                 return(BN_one(r));
-               }
-       
+
+       if (BN_abs_is_word(b, 1))
+               return (BN_copy(r, a) != NULL);
  
         BN_CTX_start(ctx);
         if ((u = BN_CTX_get(ctx)) == NULL) goto err;
@@ -784,10 +833,8 @@ int        BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsig
                 }
         if (!BN_copy(r, u)) goto err;
         bn_check_top(r);
-
         ret = 1;
-
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -801,17 +848,22 @@ int       BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsig
   */
  int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
         {
-       const int max = BN_num_bits(p);
-       unsigned int *arr=NULL, ret = 0;
-       if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-       if (BN_GF2m_poly2arr(p, arr, max) > max)
+       int ret = 0;
+       const int max = BN_num_bits(p) + 1;
+       int *arr=NULL;
+       bn_check_top(a);
+       bn_check_top(b);
+       bn_check_top(p);
+       if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
+       ret = BN_GF2m_poly2arr(p, arr, max);
+       if (!ret || ret > max)
                 {
                 BNerr(BN_F_BN_GF2M_MOD_EXP,BN_R_INVALID_LENGTH);
                 goto err;
                 }
         ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx);
         bn_check_top(r);
-  err:
+err:
         if (arr) OPENSSL_free(arr);
         return ret;
         }
@@ -820,20 +872,28 @@ int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p
   * the result in r.  r could be a.
   * Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
   */
-int    BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int    BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
         {
         int ret = 0;
         BIGNUM *u;
-       
+
+       bn_check_top(a);
+
+       if (!p[0])
+               {
+               /* reduction mod 1 => return 0 */
+               BN_zero(r);
+               return 1;
+               }
+
         BN_CTX_start(ctx);
         if ((u = BN_CTX_get(ctx)) == NULL) goto err;
         
-       if (!BN_zero(u)) goto err;
         if (!BN_set_bit(u, p[0] - 1)) goto err;
         ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx);
         bn_check_top(r);
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -847,17 +907,21 @@ int       BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_
   */
  int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
         {
-       const int max = BN_num_bits(p);
-       unsigned int *arr=NULL, ret = 0;
-       if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-       if (BN_GF2m_poly2arr(p, arr, max) > max)
+       int ret = 0;
+       const int max = BN_num_bits(p) + 1;
+       int *arr=NULL;
+       bn_check_top(a);
+       bn_check_top(p);
+       if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
+       ret = BN_GF2m_poly2arr(p, arr, max);
+       if (!ret || ret > max)
                 {
-               BNerr(BN_F_BN_GF2M_MOD_EXP,BN_R_INVALID_LENGTH);
+               BNerr(BN_F_BN_GF2M_MOD_SQRT,BN_R_INVALID_LENGTH);
                 goto err;
                 }
         ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx);
         bn_check_top(r);
-  err:
+err:
         if (arr) OPENSSL_free(arr);
         return ret;
         }
@@ -865,12 +929,20 @@ int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  /* Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns 0.
   * Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
   */
-int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx)
         {
-       int ret = 0, count = 0;
-       unsigned int j;
+       int ret = 0, count = 0, j;
         BIGNUM *a, *z, *rho, *w, *w2, *tmp;
-       
+
+       bn_check_top(a_);
+
+       if (!p[0])
+               {
+               /* reduction mod 1 => return 0 */
+               BN_zero(r);
+               return 1;
+               }
+
         BN_CTX_start(ctx);
         a = BN_CTX_get(ctx);
         z = BN_CTX_get(ctx);
@@ -881,7 +953,8 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p
         
         if (BN_is_zero(a))
                 {
-               ret = BN_zero(r);
+               BN_zero(r);
+               ret = 1;
                 goto err;
                 }
  
@@ -907,7 +980,7 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p
                         {
                         if (!BN_rand(rho, p[0], 0, 0)) goto err;
                         if (!BN_GF2m_mod_arr(rho, rho, p)) goto err;
-                       if (!BN_zero(z)) goto err;
+                       BN_zero(z);
                         if (!BN_copy(w, rho)) goto err;
                         for (j = 1; j <= p[0] - 1; j++)
                                 {
@@ -928,14 +1001,18 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p
         
         if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx)) goto err;
         if (!BN_GF2m_add(w, z, w)) goto err;
-       if (BN_GF2m_cmp(w, a)) goto err;
+       if (BN_GF2m_cmp(w, a))
+               {
+               BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_NO_SOLUTION);
+               goto err;
+               }
  
         if (!BN_copy(r, z)) goto err;
         bn_check_top(r);
  
         ret = 1;
  
-  err:
+err:
         BN_CTX_end(ctx);
         return ret;
         }
@@ -948,36 +1025,45 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p
   */
  int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
         {
-       const int max = BN_num_bits(p);
-       unsigned int *arr=NULL, ret = 0;
-       if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
-       if (BN_GF2m_poly2arr(p, arr, max) > max)
+       int ret = 0;
+       const int max = BN_num_bits(p) + 1;
+       int *arr=NULL;
+       bn_check_top(a);
+       bn_check_top(p);
+       if ((arr = (int *)OPENSSL_malloc(sizeof(int) *
+                                               max)) == NULL) goto err;
+       ret = BN_GF2m_poly2arr(p, arr, max);
+       if (!ret || ret > max)
                 {
                 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD,BN_R_INVALID_LENGTH);
                 goto err;
                 }
         ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx);
         bn_check_top(r);
-  err:
+err:
         if (arr) OPENSSL_free(arr);
         return ret;
         }
  
-/* Convert the bit-string representation of a polynomial a into an array
- * of integers corresponding to the bits with non-zero coefficient.
+/* Convert the bit-string representation of a polynomial
+ * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding 
+ * to the bits with non-zero coefficient.  Array is terminated with -1.
   * Up to max elements of the array will be filled.  Return value is total
- * number of coefficients that would be extracted if array was large enough.
+ * number of array elements that would be filled if array was large enough.
   */
-int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
+int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)
         {
-       int i, j, k;
+       int i, j, k = 0;
         BN_ULONG mask;
  
-       for (k = 0; k < max; k++) p[k] = 0;
-       k = 0;
+       if (BN_is_zero(a))
+               return 0;
  
         for (i = a->top - 1; i >= 0; i--)
                 {
+               if (!a->d[i])
+                       /* skip word if a->d[i] == 0 */
+                       continue;
                 mask = BN_TBIT;
                 for (j = BN_BITS2 - 1; j >= 0; j--)
                         {
@@ -990,24 +1076,31 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
                         }
                 }
  
+       if (k < max) {
+               p[k] = -1;
+               k++;
+       }
+
         return k;
         }
  
  /* Convert the coefficient array representation of a polynomial to a 
- * bit-string.  The array must be terminated by 0.
+ * bit-string.  The array must be terminated by -1.
   */
-int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a)
+int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
         {
         int i;
  
+       bn_check_top(a);
         BN_zero(a);
-       for (i = 0; p[i] > 0; i++)
+       for (i = 0; p[i] != -1; i++)
                 {
-               BN_set_bit(a, p[i]);
+               if (BN_set_bit(a, p[i]) == 0)
+                       return 0;
                 }
-       BN_set_bit(a, 0);
         bn_check_top(a);
-       
+
         return 1;
         }
  
+#endif