crypto/bn/bn_gf2m.c

   1 /*
   2  * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the OpenSSL license (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* ====================================================================
  11  * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
  12  *
  13  * The Elliptic Curve Public-Key Crypto Library (ECC Code) included
  14  * herein is developed by SUN MICROSYSTEMS, INC., and is contributed
  15  * to the OpenSSL project.
  16  *
  17  * The ECC Code is licensed pursuant to the OpenSSL open source
  18  * license provided below.
  19  */
  20
  21 #include <assert.h>
  22 #include <limits.h>
  23 #include <stdio.h>
  24 #include "internal/cryptlib.h"
  25 #include "bn_lcl.h"
  26
  27 #ifndef OPENSSL_NO_EC2M
  28
  29 /*
  30  * Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should
  31  * fail.
  32  */
  33 # define MAX_ITERATIONS 50
  34
  35 static const BN_ULONG SQR_tb[16] = { 0, 1, 4, 5, 16, 17, 20, 21,
  36     64, 65, 68, 69, 80, 81, 84, 85
  37 };
  38
  39 /* Platform-specific macros to accelerate squaring. */
  40 # if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
  41 #  define SQR1(w) \
  42     SQR_tb[(w) >> 60 & 0xF] << 56 | SQR_tb[(w) >> 56 & 0xF] << 48 | \
  43     SQR_tb[(w) >> 52 & 0xF] << 40 | SQR_tb[(w) >> 48 & 0xF] << 32 | \
  44     SQR_tb[(w) >> 44 & 0xF] << 24 | SQR_tb[(w) >> 40 & 0xF] << 16 | \
  45     SQR_tb[(w) >> 36 & 0xF] <<  8 | SQR_tb[(w) >> 32 & 0xF]
  46 #  define SQR0(w) \
  47     SQR_tb[(w) >> 28 & 0xF] << 56 | SQR_tb[(w) >> 24 & 0xF] << 48 | \
  48     SQR_tb[(w) >> 20 & 0xF] << 40 | SQR_tb[(w) >> 16 & 0xF] << 32 | \
  49     SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >>  8 & 0xF] << 16 | \
  50     SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
  51 # endif
  52 # ifdef THIRTY_TWO_BIT
  53 #  define SQR1(w) \
  54     SQR_tb[(w) >> 28 & 0xF] << 24 | SQR_tb[(w) >> 24 & 0xF] << 16 | \
  55     SQR_tb[(w) >> 20 & 0xF] <<  8 | SQR_tb[(w) >> 16 & 0xF]
  56 #  define SQR0(w) \
  57     SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >>  8 & 0xF] << 16 | \
  58     SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
  59 # endif
  60
  61 # if !defined(OPENSSL_BN_ASM_GF2m)
  62 /*
  63  * Product of two polynomials a, b each with degree < BN_BITS2 - 1, result is
  64  * a polynomial r with degree < 2 * BN_BITS - 1 The caller MUST ensure that
  65  * the variables have the right amount of space allocated.
  66  */
  67 #  ifdef THIRTY_TWO_BIT
  68 static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a,
  69                             const BN_ULONG b)
  70 {
  71     register BN_ULONG h, l, s;
  72     BN_ULONG tab[8], top2b = a >> 30;
  73     register BN_ULONG a1, a2, a4;
  74
  75     a1 = a & (0x3FFFFFFF);
  76     a2 = a1 << 1;
  77     a4 = a2 << 1;
  78
  79     tab[0] = 0;
  80     tab[1] = a1;
  81     tab[2] = a2;
  82     tab[3] = a1 ^ a2;
  83     tab[4] = a4;
  84     tab[5] = a1 ^ a4;
  85     tab[6] = a2 ^ a4;
  86     tab[7] = a1 ^ a2 ^ a4;
  87
  88     s = tab[b & 0x7];
  89     l = s;
  90     s = tab[b >> 3 & 0x7];
  91     l ^= s << 3;
  92     h = s >> 29;
  93     s = tab[b >> 6 & 0x7];
  94     l ^= s << 6;
  95     h ^= s >> 26;
  96     s = tab[b >> 9 & 0x7];
  97     l ^= s << 9;
  98     h ^= s >> 23;
  99     s = tab[b >> 12 & 0x7];
 100     l ^= s << 12;
 101     h ^= s >> 20;
 102     s = tab[b >> 15 & 0x7];
 103     l ^= s << 15;
 104     h ^= s >> 17;
 105     s = tab[b >> 18 & 0x7];
 106     l ^= s << 18;
 107     h ^= s >> 14;
 108     s = tab[b >> 21 & 0x7];
 109     l ^= s << 21;
 110     h ^= s >> 11;
 111     s = tab[b >> 24 & 0x7];
 112     l ^= s << 24;
 113     h ^= s >> 8;
 114     s = tab[b >> 27 & 0x7];
 115     l ^= s << 27;
 116     h ^= s >> 5;
 117     s = tab[b >> 30];
 118     l ^= s << 30;
 119     h ^= s >> 2;
 120
 121     /* compensate for the top two bits of a */
 122
 123     if (top2b & 01) {
 124         l ^= b << 30;
 125         h ^= b >> 2;
 126     }
 127     if (top2b & 02) {
 128         l ^= b << 31;
 129         h ^= b >> 1;
 130     }
 131
 132     *r1 = h;
 133     *r0 = l;
 134 }
 135 #  endif
 136 #  if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
 137 static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a,
 138                             const BN_ULONG b)
 139 {
 140     register BN_ULONG h, l, s;
 141     BN_ULONG tab[16], top3b = a >> 61;
 142     register BN_ULONG a1, a2, a4, a8;
 143
 144     a1 = a & (0x1FFFFFFFFFFFFFFFULL);
 145     a2 = a1 << 1;
 146     a4 = a2 << 1;
 147     a8 = a4 << 1;
 148
 149     tab[0] = 0;
 150     tab[1] = a1;
 151     tab[2] = a2;
 152     tab[3] = a1 ^ a2;
 153     tab[4] = a4;
 154     tab[5] = a1 ^ a4;
 155     tab[6] = a2 ^ a4;
 156     tab[7] = a1 ^ a2 ^ a4;
 157     tab[8] = a8;
 158     tab[9] = a1 ^ a8;
 159     tab[10] = a2 ^ a8;
 160     tab[11] = a1 ^ a2 ^ a8;
 161     tab[12] = a4 ^ a8;
 162     tab[13] = a1 ^ a4 ^ a8;
 163     tab[14] = a2 ^ a4 ^ a8;
 164     tab[15] = a1 ^ a2 ^ a4 ^ a8;
 165
 166     s = tab[b & 0xF];
 167     l = s;
 168     s = tab[b >> 4 & 0xF];
 169     l ^= s << 4;
 170     h = s >> 60;
 171     s = tab[b >> 8 & 0xF];
 172     l ^= s << 8;
 173     h ^= s >> 56;
 174     s = tab[b >> 12 & 0xF];
 175     l ^= s << 12;
 176     h ^= s >> 52;
 177     s = tab[b >> 16 & 0xF];
 178     l ^= s << 16;
 179     h ^= s >> 48;
 180     s = tab[b >> 20 & 0xF];
 181     l ^= s << 20;
 182     h ^= s >> 44;
 183     s = tab[b >> 24 & 0xF];
 184     l ^= s << 24;
 185     h ^= s >> 40;
 186     s = tab[b >> 28 & 0xF];
 187     l ^= s << 28;
 188     h ^= s >> 36;
 189     s = tab[b >> 32 & 0xF];
 190     l ^= s << 32;
 191     h ^= s >> 32;
 192     s = tab[b >> 36 & 0xF];
 193     l ^= s << 36;
 194     h ^= s >> 28;
 195     s = tab[b >> 40 & 0xF];
 196     l ^= s << 40;
 197     h ^= s >> 24;
 198     s = tab[b >> 44 & 0xF];
 199     l ^= s << 44;
 200     h ^= s >> 20;
 201     s = tab[b >> 48 & 0xF];
 202     l ^= s << 48;
 203     h ^= s >> 16;
 204     s = tab[b >> 52 & 0xF];
 205     l ^= s << 52;
 206     h ^= s >> 12;
 207     s = tab[b >> 56 & 0xF];
 208     l ^= s << 56;
 209     h ^= s >> 8;
 210     s = tab[b >> 60];
 211     l ^= s << 60;
 212     h ^= s >> 4;
 213
 214     /* compensate for the top three bits of a */
 215
 216     if (top3b & 01) {
 217         l ^= b << 61;
 218         h ^= b >> 3;
 219     }
 220     if (top3b & 02) {
 221         l ^= b << 62;
 222         h ^= b >> 2;
 223     }
 224     if (top3b & 04) {
 225         l ^= b << 63;
 226         h ^= b >> 1;
 227     }
 228
 229     *r1 = h;
 230     *r0 = l;
 231 }
 232 #  endif
 233
 234 /*
 235  * Product of two polynomials a, b each with degree < 2 * BN_BITS2 - 1,
 236  * result is a polynomial r with degree < 4 * BN_BITS2 - 1 The caller MUST
 237  * ensure that the variables have the right amount of space allocated.
 238  */
 239 static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0,
 240                             const BN_ULONG b1, const BN_ULONG b0)
 241 {
 242     BN_ULONG m1, m0;
 243     /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
 244     bn_GF2m_mul_1x1(r + 3, r + 2, a1, b1);
 245     bn_GF2m_mul_1x1(r + 1, r, a0, b0);
 246     bn_GF2m_mul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
 247     /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
 248     r[2] ^= m1 ^ r[1] ^ r[3];   /* h0 ^= m1 ^ l1 ^ h1; */
 249     r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
 250 }
 251 # else
 252 void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1,
 253                      BN_ULONG b0);
 254 # endif
 255
 256 /*
 257  * Add polynomials a and b and store result in r; r could be a or b, a and b
 258  * could be equal; r is the bitwise XOR of a and b.
 259  */
 260 int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
 261 {
 262     int i;
 263     const BIGNUM *at, *bt;
 264
 265     bn_check_top(a);
 266     bn_check_top(b);
 267
 268     if (a->top < b->top) {
 269         at = b;
 270         bt = a;
 271     } else {
 272         at = a;
 273         bt = b;
 274     }
 275
 276     if (bn_wexpand(r, at->top) == NULL)
 277         return 0;
 278
 279     for (i = 0; i < bt->top; i++) {
 280         r->d[i] = at->d[i] ^ bt->d[i];
 281     }
 282     for (; i < at->top; i++) {
 283         r->d[i] = at->d[i];
 284     }
 285
 286     r->top = at->top;
 287     bn_correct_top(r);
 288
 289     return 1;
 290 }
 291
 292 /*-
 293  * Some functions allow for representation of the irreducible polynomials
 294  * as an int[], say p.  The irreducible f(t) is then of the form:
 295  *     t^p[0] + t^p[1] + ... + t^p[k]
 296  * where m = p[0] > p[1] > ... > p[k] = 0.
 297  */
 298
 299 /* Performs modular reduction of a and store result in r.  r could be a. */
 300 int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
 301 {
 302     int j, k;
 303     int n, dN, d0, d1;
 304     BN_ULONG zz, *z;
 305
 306     bn_check_top(a);
 307
 308     if (!p[0]) {
 309         /* reduction mod 1 => return 0 */
 310         BN_zero(r);
 311         return 1;
 312     }
 313
 314     /*
 315      * Since the algorithm does reduction in the r value, if a != r, copy the
 316      * contents of a into r so we can do reduction in r.
 317      */
 318     if (a != r) {
 319         if (!bn_wexpand(r, a->top))
 320             return 0;
 321         for (j = 0; j < a->top; j++) {
 322             r->d[j] = a->d[j];
 323         }
 324         r->top = a->top;
 325     }
 326     z = r->d;
 327
 328     /* start reduction */
 329     dN = p[0] / BN_BITS2;
 330     for (j = r->top - 1; j > dN;) {
 331         zz = z[j];
 332         if (z[j] == 0) {
 333             j--;
 334             continue;
 335         }
 336         z[j] = 0;
 337
 338         for (k = 1; p[k] != 0; k++) {
 339             /* reducing component t^p[k] */
 340             n = p[0] - p[k];
 341             d0 = n % BN_BITS2;
 342             d1 = BN_BITS2 - d0;
 343             n /= BN_BITS2;
 344             z[j - n] ^= (zz >> d0);
 345             if (d0)
 346                 z[j - n - 1] ^= (zz << d1);
 347         }
 348
 349         /* reducing component t^0 */
 350         n = dN;
 351         d0 = p[0] % BN_BITS2;
 352         d1 = BN_BITS2 - d0;
 353         z[j - n] ^= (zz >> d0);
 354         if (d0)
 355             z[j - n - 1] ^= (zz << d1);
 356     }
 357
 358     /* final round of reduction */
 359     while (j == dN) {
 360
 361         d0 = p[0] % BN_BITS2;
 362         zz = z[dN] >> d0;
 363         if (zz == 0)
 364             break;
 365         d1 = BN_BITS2 - d0;
 366
 367         /* clear up the top d1 bits */
 368         if (d0)
 369             z[dN] = (z[dN] << d1) >> d1;
 370         else
 371             z[dN] = 0;
 372         z[0] ^= zz;             /* reduction t^0 component */
 373
 374         for (k = 1; p[k] != 0; k++) {
 375             BN_ULONG tmp_ulong;
 376
 377             /* reducing component t^p[k] */
 378             n = p[k] / BN_BITS2;
 379             d0 = p[k] % BN_BITS2;
 380             d1 = BN_BITS2 - d0;
 381             z[n] ^= (zz << d0);
 382             if (d0 && (tmp_ulong = zz >> d1))
 383                 z[n + 1] ^= tmp_ulong;
 384         }
 385
 386     }
 387
 388     bn_correct_top(r);
 389     return 1;
 390 }
 391
 392 /*
 393  * Performs modular reduction of a by p and store result in r.  r could be a.
 394  * This function calls down to the BN_GF2m_mod_arr implementation; this wrapper
 395  * function is only provided for convenience; for best performance, use the
 396  * BN_GF2m_mod_arr function.
 397  */
 398 int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
 399 {
 400     int ret = 0;
 401     int arr[6];
 402     bn_check_top(a);
 403     bn_check_top(p);
 404     ret = BN_GF2m_poly2arr(p, arr, OSSL_NELEM(arr));
 405     if (!ret || ret > (int)OSSL_NELEM(arr)) {
 406         BNerr(BN_F_BN_GF2M_MOD, BN_R_INVALID_LENGTH);
 407         return 0;
 408     }
 409     ret = BN_GF2m_mod_arr(r, a, arr);
 410     bn_check_top(r);
 411     return ret;
 412 }
 413
 414 /*
 415  * Compute the product of two polynomials a and b, reduce modulo p, and store
 416  * the result in r.  r could be a or b; a could be b.
 417  */
 418 int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
 419                         const int p[], BN_CTX *ctx)
 420 {
 421     int zlen, i, j, k, ret = 0;
 422     BIGNUM *s;
 423     BN_ULONG x1, x0, y1, y0, zz[4];
 424
 425     bn_check_top(a);
 426     bn_check_top(b);
 427
 428     if (a == b) {
 429         return BN_GF2m_mod_sqr_arr(r, a, p, ctx);
 430     }
 431
 432     BN_CTX_start(ctx);
 433     if ((s = BN_CTX_get(ctx)) == NULL)
 434         goto err;
 435
 436     zlen = a->top + b->top + 4;
 437     if (!bn_wexpand(s, zlen))
 438         goto err;
 439     s->top = zlen;
 440
 441     for (i = 0; i < zlen; i++)
 442         s->d[i] = 0;
 443
 444     for (j = 0; j < b->top; j += 2) {
 445         y0 = b->d[j];
 446         y1 = ((j + 1) == b->top) ? 0 : b->d[j + 1];
 447         for (i = 0; i < a->top; i += 2) {
 448             x0 = a->d[i];
 449             x1 = ((i + 1) == a->top) ? 0 : a->d[i + 1];
 450             bn_GF2m_mul_2x2(zz, x1, x0, y1, y0);
 451             for (k = 0; k < 4; k++)
 452                 s->d[i + j + k] ^= zz[k];
 453         }
 454     }
 455
 456     bn_correct_top(s);
 457     if (BN_GF2m_mod_arr(r, s, p))
 458         ret = 1;
 459     bn_check_top(r);
 460
 461  err:
 462     BN_CTX_end(ctx);
 463     return ret;
 464 }
 465
 466 /*
 467  * Compute the product of two polynomials a and b, reduce modulo p, and store
 468  * the result in r.  r could be a or b; a could equal b. This function calls
 469  * down to the BN_GF2m_mod_mul_arr implementation; this wrapper function is
 470  * only provided for convenience; for best performance, use the
 471  * BN_GF2m_mod_mul_arr function.
 472  */
 473 int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
 474                     const BIGNUM *p, BN_CTX *ctx)
 475 {
 476     int ret = 0;
 477     const int max = BN_num_bits(p) + 1;
 478     int *arr = NULL;
 479     bn_check_top(a);
 480     bn_check_top(b);
 481     bn_check_top(p);
 482     if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL)
 483         goto err;
 484     ret = BN_GF2m_poly2arr(p, arr, max);
 485     if (!ret || ret > max) {
 486         BNerr(BN_F_BN_GF2M_MOD_MUL, BN_R_INVALID_LENGTH);
 487         goto err;
 488     }
 489     ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx);
 490     bn_check_top(r);
 491  err:
 492     OPENSSL_free(arr);
 493     return ret;
 494 }
 495
 496 /* Square a, reduce the result mod p, and store it in a.  r could be a. */
 497 int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],
 498                         BN_CTX *ctx)
 499 {
 500     int i, ret = 0;
 501     BIGNUM *s;
 502
 503     bn_check_top(a);
 504     BN_CTX_start(ctx);
 505     if ((s = BN_CTX_get(ctx)) == NULL)
 506         goto err;
 507     if (!bn_wexpand(s, 2 * a->top))
 508         goto err;
 509
 510     for (i = a->top - 1; i >= 0; i--) {
 511         s->d[2 * i + 1] = SQR1(a->d[i]);
 512         s->d[2 * i] = SQR0(a->d[i]);
 513     }
 514
 515     s->top = 2 * a->top;
 516     bn_correct_top(s);
 517     if (!BN_GF2m_mod_arr(r, s, p))
 518         goto err;
 519     bn_check_top(r);
 520     ret = 1;
 521  err:
 522     BN_CTX_end(ctx);
 523     return ret;
 524 }
 525
 526 /*
 527  * Square a, reduce the result mod p, and store it in a.  r could be a. This
 528  * function calls down to the BN_GF2m_mod_sqr_arr implementation; this
 529  * wrapper function is only provided for convenience; for best performance,
 530  * use the BN_GF2m_mod_sqr_arr function.
 531  */
 532 int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 533 {
 534     int ret = 0;
 535     const int max = BN_num_bits(p) + 1;
 536     int *arr = NULL;
 537
 538     bn_check_top(a);
 539     bn_check_top(p);
 540     if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL)
 541         goto err;
 542     ret = BN_GF2m_poly2arr(p, arr, max);
 543     if (!ret || ret > max) {
 544         BNerr(BN_F_BN_GF2M_MOD_SQR, BN_R_INVALID_LENGTH);
 545         goto err;
 546     }
 547     ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx);
 548     bn_check_top(r);
 549  err:
 550     OPENSSL_free(arr);
 551     return ret;
 552 }
 553
 554 /*
 555  * Invert a, reduce modulo p, and store the result in r. r could be a. Uses
 556  * Modified Almost Inverse Algorithm (Algorithm 10) from Hankerson, D.,
 557  * Hernandez, J.L., and Menezes, A.  "Software Implementation of Elliptic
 558  * Curve Cryptography Over Binary Fields".
 559  */
 560 int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 561 {
 562     BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;
 563     int ret = 0;
 564
 565     bn_check_top(a);
 566     bn_check_top(p);
 567
 568     BN_CTX_start(ctx);
 569
 570     if ((b = BN_CTX_get(ctx)) == NULL)
 571         goto err;
 572     if ((c = BN_CTX_get(ctx)) == NULL)
 573         goto err;
 574     if ((u = BN_CTX_get(ctx)) == NULL)
 575         goto err;
 576     if ((v = BN_CTX_get(ctx)) == NULL)
 577         goto err;
 578
 579     if (!BN_GF2m_mod(u, a, p))
 580         goto err;
 581     if (BN_is_zero(u))
 582         goto err;
 583
 584     if (!BN_copy(v, p))
 585         goto err;
 586 # if 0
 587     if (!BN_one(b))
 588         goto err;
 589
 590     while (1) {
 591         while (!BN_is_odd(u)) {
 592             if (BN_is_zero(u))
 593                 goto err;
 594             if (!BN_rshift1(u, u))
 595                 goto err;
 596             if (BN_is_odd(b)) {
 597                 if (!BN_GF2m_add(b, b, p))
 598                     goto err;
 599             }
 600             if (!BN_rshift1(b, b))
 601                 goto err;
 602         }
 603
 604         if (BN_abs_is_word(u, 1))
 605             break;
 606
 607         if (BN_num_bits(u) < BN_num_bits(v)) {
 608             tmp = u;
 609             u = v;
 610             v = tmp;
 611             tmp = b;
 612             b = c;
 613             c = tmp;
 614         }
 615
 616         if (!BN_GF2m_add(u, u, v))
 617             goto err;
 618         if (!BN_GF2m_add(b, b, c))
 619             goto err;
 620     }
 621 # else
 622     {
 623         int i;
 624         int ubits = BN_num_bits(u);
 625         int vbits = BN_num_bits(v); /* v is copy of p */
 626         int top = p->top;
 627         BN_ULONG *udp, *bdp, *vdp, *cdp;
 628
 629         if (!bn_wexpand(u, top))
 630             goto err;
 631         udp = u->d;
 632         for (i = u->top; i < top; i++)
 633             udp[i] = 0;
 634         u->top = top;
 635         if (!bn_wexpand(b, top))
 636           goto err;
 637         bdp = b->d;
 638         bdp[0] = 1;
 639         for (i = 1; i < top; i++)
 640             bdp[i] = 0;
 641         b->top = top;
 642         if (!bn_wexpand(c, top))
 643           goto err;
 644         cdp = c->d;
 645         for (i = 0; i < top; i++)
 646             cdp[i] = 0;
 647         c->top = top;
 648         vdp = v->d;             /* It pays off to "cache" *->d pointers,
 649                                  * because it allows optimizer to be more
 650                                  * aggressive. But we don't have to "cache"
 651                                  * p->d, because *p is declared 'const'... */
 652         while (1) {
 653             while (ubits && !(udp[0] & 1)) {
 654                 BN_ULONG u0, u1, b0, b1, mask;
 655
 656                 u0 = udp[0];
 657                 b0 = bdp[0];
 658                 mask = (BN_ULONG)0 - (b0 & 1);
 659                 b0 ^= p->d[0] & mask;
 660                 for (i = 0; i < top - 1; i++) {
 661                     u1 = udp[i + 1];
 662                     udp[i] = ((u0 >> 1) | (u1 << (BN_BITS2 - 1))) & BN_MASK2;
 663                     u0 = u1;
 664                     b1 = bdp[i + 1] ^ (p->d[i + 1] & mask);
 665                     bdp[i] = ((b0 >> 1) | (b1 << (BN_BITS2 - 1))) & BN_MASK2;
 666                     b0 = b1;
 667                 }
 668                 udp[i] = u0 >> 1;
 669                 bdp[i] = b0 >> 1;
 670                 ubits--;
 671             }
 672
 673             if (ubits <= BN_BITS2) {
 674                 if (udp[0] == 0) /* poly was reducible */
 675                     goto err;
 676                 if (udp[0] == 1)
 677                     break;
 678             }
 679
 680             if (ubits < vbits) {
 681                 i = ubits;
 682                 ubits = vbits;
 683                 vbits = i;
 684                 tmp = u;
 685                 u = v;
 686                 v = tmp;
 687                 tmp = b;
 688                 b = c;
 689                 c = tmp;
 690                 udp = vdp;
 691                 vdp = v->d;
 692                 bdp = cdp;
 693                 cdp = c->d;
 694             }
 695             for (i = 0; i < top; i++) {
 696                 udp[i] ^= vdp[i];
 697                 bdp[i] ^= cdp[i];
 698             }
 699             if (ubits == vbits) {
 700                 BN_ULONG ul;
 701                 int utop = (ubits - 1) / BN_BITS2;
 702
 703                 while ((ul = udp[utop]) == 0 && utop)
 704                     utop--;
 705                 ubits = utop * BN_BITS2 + BN_num_bits_word(ul);
 706             }
 707         }
 708         bn_correct_top(b);
 709     }
 710 # endif
 711
 712     if (!BN_copy(r, b))
 713         goto err;
 714     bn_check_top(r);
 715     ret = 1;
 716
 717  err:
 718 # ifdef BN_DEBUG                /* BN_CTX_end would complain about the
 719                                  * expanded form */
 720     bn_correct_top(c);
 721     bn_correct_top(u);
 722     bn_correct_top(v);
 723 # endif
 724     BN_CTX_end(ctx);
 725     return ret;
 726 }
 727
 728 /*
 729  * Invert xx, reduce modulo p, and store the result in r. r could be xx.
 730  * This function calls down to the BN_GF2m_mod_inv implementation; this
 731  * wrapper function is only provided for convenience; for best performance,
 732  * use the BN_GF2m_mod_inv function.
 733  */
 734 int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[],
 735                         BN_CTX *ctx)
 736 {
 737     BIGNUM *field;
 738     int ret = 0;
 739
 740     bn_check_top(xx);
 741     BN_CTX_start(ctx);
 742     if ((field = BN_CTX_get(ctx)) == NULL)
 743         goto err;
 744     if (!BN_GF2m_arr2poly(p, field))
 745         goto err;
 746
 747     ret = BN_GF2m_mod_inv(r, xx, field, ctx);
 748     bn_check_top(r);
 749
 750  err:
 751     BN_CTX_end(ctx);
 752     return ret;
 753 }
 754
 755 # ifndef OPENSSL_SUN_GF2M_DIV
 756 /*
 757  * Divide y by x, reduce modulo p, and store the result in r. r could be x
 758  * or y, x could equal y.
 759  */
 760 int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,
 761                     const BIGNUM *p, BN_CTX *ctx)
 762 {
 763     BIGNUM *xinv = NULL;
 764     int ret = 0;
 765
 766     bn_check_top(y);
 767     bn_check_top(x);
 768     bn_check_top(p);
 769
 770     BN_CTX_start(ctx);
 771     xinv = BN_CTX_get(ctx);
 772     if (xinv == NULL)
 773         goto err;
 774
 775     if (!BN_GF2m_mod_inv(xinv, x, p, ctx))
 776         goto err;
 777     if (!BN_GF2m_mod_mul(r, y, xinv, p, ctx))
 778         goto err;
 779     bn_check_top(r);
 780     ret = 1;
 781
 782  err:
 783     BN_CTX_end(ctx);
 784     return ret;
 785 }
 786 # else
 787 /*
 788  * Divide y by x, reduce modulo p, and store the result in r. r could be x
 789  * or y, x could equal y. Uses algorithm Modular_Division_GF(2^m) from
 790  * Chang-Shantz, S.  "From Euclid's GCD to Montgomery Multiplication to the
 791  * Great Divide".
 792  */
 793 int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,
 794                     const BIGNUM *p, BN_CTX *ctx)
 795 {
 796     BIGNUM *a, *b, *u, *v;
 797     int ret = 0;
 798
 799     bn_check_top(y);
 800     bn_check_top(x);
 801     bn_check_top(p);
 802
 803     BN_CTX_start(ctx);
 804
 805     a = BN_CTX_get(ctx);
 806     b = BN_CTX_get(ctx);
 807     u = BN_CTX_get(ctx);
 808     v = BN_CTX_get(ctx);
 809     if (v == NULL)
 810         goto err;
 811
 812     /* reduce x and y mod p */
 813     if (!BN_GF2m_mod(u, y, p))
 814         goto err;
 815     if (!BN_GF2m_mod(a, x, p))
 816         goto err;
 817     if (!BN_copy(b, p))
 818         goto err;
 819
 820     while (!BN_is_odd(a)) {
 821         if (!BN_rshift1(a, a))
 822             goto err;
 823         if (BN_is_odd(u))
 824             if (!BN_GF2m_add(u, u, p))
 825                 goto err;
 826         if (!BN_rshift1(u, u))
 827             goto err;
 828     }
 829
 830     do {
 831         if (BN_GF2m_cmp(b, a) > 0) {
 832             if (!BN_GF2m_add(b, b, a))
 833                 goto err;
 834             if (!BN_GF2m_add(v, v, u))
 835                 goto err;
 836             do {
 837                 if (!BN_rshift1(b, b))
 838                     goto err;
 839                 if (BN_is_odd(v))
 840                     if (!BN_GF2m_add(v, v, p))
 841                         goto err;
 842                 if (!BN_rshift1(v, v))
 843                     goto err;
 844             } while (!BN_is_odd(b));
 845         } else if (BN_abs_is_word(a, 1))
 846             break;
 847         else {
 848             if (!BN_GF2m_add(a, a, b))
 849                 goto err;
 850             if (!BN_GF2m_add(u, u, v))
 851                 goto err;
 852             do {
 853                 if (!BN_rshift1(a, a))
 854                     goto err;
 855                 if (BN_is_odd(u))
 856                     if (!BN_GF2m_add(u, u, p))
 857                         goto err;
 858                 if (!BN_rshift1(u, u))
 859                     goto err;
 860             } while (!BN_is_odd(a));
 861         }
 862     } while (1);
 863
 864     if (!BN_copy(r, u))
 865         goto err;
 866     bn_check_top(r);
 867     ret = 1;
 868
 869  err:
 870     BN_CTX_end(ctx);
 871     return ret;
 872 }
 873 # endif
 874
 875 /*
 876  * Divide yy by xx, reduce modulo p, and store the result in r. r could be xx
 877  * * or yy, xx could equal yy. This function calls down to the
 878  * BN_GF2m_mod_div implementation; this wrapper function is only provided for
 879  * convenience; for best performance, use the BN_GF2m_mod_div function.
 880  */
 881 int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx,
 882                         const int p[], BN_CTX *ctx)
 883 {
 884     BIGNUM *field;
 885     int ret = 0;
 886
 887     bn_check_top(yy);
 888     bn_check_top(xx);
 889
 890     BN_CTX_start(ctx);
 891     if ((field = BN_CTX_get(ctx)) == NULL)
 892         goto err;
 893     if (!BN_GF2m_arr2poly(p, field))
 894         goto err;
 895
 896     ret = BN_GF2m_mod_div(r, yy, xx, field, ctx);
 897     bn_check_top(r);
 898
 899  err:
 900     BN_CTX_end(ctx);
 901     return ret;
 902 }
 903
 904 /*
 905  * Compute the bth power of a, reduce modulo p, and store the result in r.  r
 906  * could be a. Uses simple square-and-multiply algorithm A.5.1 from IEEE
 907  * P1363.
 908  */
 909 int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
 910                         const int p[], BN_CTX *ctx)
 911 {
 912     int ret = 0, i, n;
 913     BIGNUM *u;
 914
 915     bn_check_top(a);
 916     bn_check_top(b);
 917
 918     if (BN_is_zero(b))
 919         return (BN_one(r));
 920
 921     if (BN_abs_is_word(b, 1))
 922         return (BN_copy(r, a) != NULL);
 923
 924     BN_CTX_start(ctx);
 925     if ((u = BN_CTX_get(ctx)) == NULL)
 926         goto err;
 927
 928     if (!BN_GF2m_mod_arr(u, a, p))
 929         goto err;
 930
 931     n = BN_num_bits(b) - 1;
 932     for (i = n - 1; i >= 0; i--) {
 933         if (!BN_GF2m_mod_sqr_arr(u, u, p, ctx))
 934             goto err;
 935         if (BN_is_bit_set(b, i)) {
 936             if (!BN_GF2m_mod_mul_arr(u, u, a, p, ctx))
 937                 goto err;
 938         }
 939     }
 940     if (!BN_copy(r, u))
 941         goto err;
 942     bn_check_top(r);
 943     ret = 1;
 944  err:
 945     BN_CTX_end(ctx);
 946     return ret;
 947 }
 948
 949 /*
 950  * Compute the bth power of a, reduce modulo p, and store the result in r.  r
 951  * could be a. This function calls down to the BN_GF2m_mod_exp_arr
 952  * implementation; this wrapper function is only provided for convenience;
 953  * for best performance, use the BN_GF2m_mod_exp_arr function.
 954  */
 955 int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
 956                     const BIGNUM *p, BN_CTX *ctx)
 957 {
 958     int ret = 0;
 959     const int max = BN_num_bits(p) + 1;
 960     int *arr = NULL;
 961     bn_check_top(a);
 962     bn_check_top(b);
 963     bn_check_top(p);
 964     if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL)
 965         goto err;
 966     ret = BN_GF2m_poly2arr(p, arr, max);
 967     if (!ret || ret > max) {
 968         BNerr(BN_F_BN_GF2M_MOD_EXP, BN_R_INVALID_LENGTH);
 969         goto err;
 970     }
 971     ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx);
 972     bn_check_top(r);
 973  err:
 974     OPENSSL_free(arr);
 975     return ret;
 976 }
 977
 978 /*
 979  * Compute the square root of a, reduce modulo p, and store the result in r.
 980  * r could be a. Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
 981  */
 982 int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[],
 983                          BN_CTX *ctx)
 984 {
 985     int ret = 0;
 986     BIGNUM *u;
 987
 988     bn_check_top(a);
 989
 990     if (!p[0]) {
 991         /* reduction mod 1 => return 0 */
 992         BN_zero(r);
 993         return 1;
 994     }
 995
 996     BN_CTX_start(ctx);
 997     if ((u = BN_CTX_get(ctx)) == NULL)
 998         goto err;
 999
1000     if (!BN_set_bit(u, p[0] - 1))
1001         goto err;
1002     ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx);
1003     bn_check_top(r);
1004
1005  err:
1006     BN_CTX_end(ctx);
1007     return ret;
1008 }
1009
1010 /*
1011  * Compute the square root of a, reduce modulo p, and store the result in r.
1012  * r could be a. This function calls down to the BN_GF2m_mod_sqrt_arr
1013  * implementation; this wrapper function is only provided for convenience;
1014  * for best performance, use the BN_GF2m_mod_sqrt_arr function.
1015  */
1016 int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
1017 {
1018     int ret = 0;
1019     const int max = BN_num_bits(p) + 1;
1020     int *arr = NULL;
1021     bn_check_top(a);
1022     bn_check_top(p);
1023     if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL)
1024         goto err;
1025     ret = BN_GF2m_poly2arr(p, arr, max);
1026     if (!ret || ret > max) {
1027         BNerr(BN_F_BN_GF2M_MOD_SQRT, BN_R_INVALID_LENGTH);
1028         goto err;
1029     }
1030     ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx);
1031     bn_check_top(r);
1032  err:
1033     OPENSSL_free(arr);
1034     return ret;
1035 }
1036
1037 /*
1038  * Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns
1039  * 0. Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
1040  */
1041 int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[],
1042                                BN_CTX *ctx)
1043 {
1044     int ret = 0, count = 0, j;
1045     BIGNUM *a, *z, *rho, *w, *w2, *tmp;
1046
1047     bn_check_top(a_);
1048
1049     if (!p[0]) {
1050         /* reduction mod 1 => return 0 */
1051         BN_zero(r);
1052         return 1;
1053     }
1054
1055     BN_CTX_start(ctx);
1056     a = BN_CTX_get(ctx);
1057     z = BN_CTX_get(ctx);
1058     w = BN_CTX_get(ctx);
1059     if (w == NULL)
1060         goto err;
1061
1062     if (!BN_GF2m_mod_arr(a, a_, p))
1063         goto err;
1064
1065     if (BN_is_zero(a)) {
1066         BN_zero(r);
1067         ret = 1;
1068         goto err;
1069     }
1070
1071     if (p[0] & 0x1) {           /* m is odd */
1072         /* compute half-trace of a */
1073         if (!BN_copy(z, a))
1074             goto err;
1075         for (j = 1; j <= (p[0] - 1) / 2; j++) {
1076             if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx))
1077                 goto err;
1078             if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx))
1079                 goto err;
1080             if (!BN_GF2m_add(z, z, a))
1081                 goto err;
1082         }
1083
1084     } else {                    /* m is even */
1085
1086         rho = BN_CTX_get(ctx);
1087         w2 = BN_CTX_get(ctx);
1088         tmp = BN_CTX_get(ctx);
1089         if (tmp == NULL)
1090             goto err;
1091         do {
1092             if (!BN_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
1093                 goto err;
1094             if (!BN_GF2m_mod_arr(rho, rho, p))
1095                 goto err;
1096             BN_zero(z);
1097             if (!BN_copy(w, rho))
1098                 goto err;
1099             for (j = 1; j <= p[0] - 1; j++) {
1100                 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx))
1101                     goto err;
1102                 if (!BN_GF2m_mod_sqr_arr(w2, w, p, ctx))
1103                     goto err;
1104                 if (!BN_GF2m_mod_mul_arr(tmp, w2, a, p, ctx))
1105                     goto err;
1106                 if (!BN_GF2m_add(z, z, tmp))
1107                     goto err;
1108                 if (!BN_GF2m_add(w, w2, rho))
1109                     goto err;
1110             }
1111             count++;
1112         } while (BN_is_zero(w) && (count < MAX_ITERATIONS));
1113         if (BN_is_zero(w)) {
1114             BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_TOO_MANY_ITERATIONS);
1115             goto err;
1116         }
1117     }
1118
1119     if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx))
1120         goto err;
1121     if (!BN_GF2m_add(w, z, w))
1122         goto err;
1123     if (BN_GF2m_cmp(w, a)) {
1124         BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_NO_SOLUTION);
1125         goto err;
1126     }
1127
1128     if (!BN_copy(r, z))
1129         goto err;
1130     bn_check_top(r);
1131
1132     ret = 1;
1133
1134  err:
1135     BN_CTX_end(ctx);
1136     return ret;
1137 }
1138
1139 /*
1140  * Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns
1141  * 0. This function calls down to the BN_GF2m_mod_solve_quad_arr
1142  * implementation; this wrapper function is only provided for convenience;
1143  * for best performance, use the BN_GF2m_mod_solve_quad_arr function.
1144  */
1145 int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
1146                            BN_CTX *ctx)
1147 {
1148     int ret = 0;
1149     const int max = BN_num_bits(p) + 1;
1150     int *arr = NULL;
1151     bn_check_top(a);
1152     bn_check_top(p);
1153     if ((arr = OPENSSL_malloc(sizeof(*arr) * max)) == NULL)
1154         goto err;
1155     ret = BN_GF2m_poly2arr(p, arr, max);
1156     if (!ret || ret > max) {
1157         BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD, BN_R_INVALID_LENGTH);
1158         goto err;
1159     }
1160     ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx);
1161     bn_check_top(r);
1162  err:
1163     OPENSSL_free(arr);
1164     return ret;
1165 }
1166
1167 /*
1168  * Convert the bit-string representation of a polynomial ( \sum_{i=0}^n a_i *
1169  * x^i) into an array of integers corresponding to the bits with non-zero
1170  * coefficient.  Array is terminated with -1. Up to max elements of the array
1171  * will be filled.  Return value is total number of array elements that would
1172  * be filled if array was large enough.
1173  */
1174 int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)
1175 {
1176     int i, j, k = 0;
1177     BN_ULONG mask;
1178
1179     if (BN_is_zero(a))
1180         return 0;
1181
1182     for (i = a->top - 1; i >= 0; i--) {
1183         if (!a->d[i])
1184             /* skip word if a->d[i] == 0 */
1185             continue;
1186         mask = BN_TBIT;
1187         for (j = BN_BITS2 - 1; j >= 0; j--) {
1188             if (a->d[i] & mask) {
1189                 if (k < max)
1190                     p[k] = BN_BITS2 * i + j;
1191                 k++;
1192             }
1193             mask >>= 1;
1194         }
1195     }
1196
1197     if (k < max) {
1198         p[k] = -1;
1199         k++;
1200     }
1201
1202     return k;
1203 }
1204
1205 /*
1206  * Convert the coefficient array representation of a polynomial to a
1207  * bit-string.  The array must be terminated by -1.
1208  */
1209 int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
1210 {
1211     int i;
1212
1213     bn_check_top(a);
1214     BN_zero(a);
1215     for (i = 0; p[i] != -1; i++) {
1216         if (BN_set_bit(a, p[i]) == 0)
1217             return 0;
1218     }
1219     bn_check_top(a);
1220
1221     return 1;
1222 }
1223
1224 #endif