crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the OpenSSL license (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  28  *
  29  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  30  * and Adam Langley's public domain 64-bit C implementation of curve25519
  31  */
  32
  33 #include <openssl/opensslconf.h>
  34 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
  35 NON_EMPTY_TRANSLATION_UNIT
  36 #else
  37
  38 # include <stdint.h>
  39 # include <string.h>
  40 # include <openssl/err.h>
  41 # include "ec_lcl.h"
  42
  43 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
  44   /* even with gcc, the typedef won't work for 32-bit platforms */
  45 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  46                                  * platforms */
  47 # else
  48 #  error "Need GCC 3.1 or later to define type uint128_t"
  49 # endif
  50
  51 typedef uint8_t u8;
  52 typedef uint64_t u64;
  53
  54 /******************************************************************************/
  55 /*-
  56  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  57  *
  58  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  59  * using 64-bit coefficients called 'limbs',
  60  * and sometimes (for multiplication results) as
  61  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  62  * using 128-bit coefficients called 'widelimbs'.
  63  * A 4-limb representation is an 'felem';
  64  * a 7-widelimb representation is a 'widefelem'.
  65  * Even within felems, bits of adjacent limbs overlap, and we don't always
  66  * reduce the representations: we ensure that inputs to each felem
  67  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  68  * and fit into a 128-bit word without overflow. The coefficients are then
  69  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  70  * We only reduce to the unique minimal representation at the end of the
  71  * computation.
  72  */
  73
  74 typedef uint64_t limb;
  75 typedef uint128_t widelimb;
  76
  77 typedef limb felem[4];
  78 typedef widelimb widefelem[7];
  79
  80 /*
  81  * Field element represented as a byte arrary. 28*8 = 224 bits is also the
  82  * group order size for the elliptic curve, and we also use this type for
  83  * scalars for point multiplication.
  84  */
  85 typedef u8 felem_bytearray[28];
  86
  87 static const felem_bytearray nistp224_curve_params[5] = {
  88     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  89      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  90      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  91     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  93      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  94     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  95      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  96      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  97     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
  98      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
  99      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 100     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 101      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 102      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 103 };
 104
 105 /*-
 106  * Precomputed multiples of the standard generator
 107  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 108  * (0 for the point at infinity).
 109  * For each field element, slice a_0 is word 0, etc.
 110  *
 111  * The table has 2 * 16 elements, starting with the following:
 112  * index | bits    | point
 113  * ------+---------+------------------------------
 114  *     0 | 0 0 0 0 | 0G
 115  *     1 | 0 0 0 1 | 1G
 116  *     2 | 0 0 1 0 | 2^56G
 117  *     3 | 0 0 1 1 | (2^56 + 1)G
 118  *     4 | 0 1 0 0 | 2^112G
 119  *     5 | 0 1 0 1 | (2^112 + 1)G
 120  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 121  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 122  *     8 | 1 0 0 0 | 2^168G
 123  *     9 | 1 0 0 1 | (2^168 + 1)G
 124  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 125  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 126  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 127  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 128  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 129  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 130  * followed by a copy of this with each element multiplied by 2^28.
 131  *
 132  * The reason for this is so that we can clock bits into four different
 133  * locations when doing simple scalar multiplies against the base point,
 134  * and then another four locations using the second 16 elements.
 135  */
 136 static const felem gmul[2][16][3] = {
 137 {{{0, 0, 0, 0},
 138   {0, 0, 0, 0},
 139   {0, 0, 0, 0}},
 140  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 141   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 142   {1, 0, 0, 0}},
 143  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 144   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 145   {1, 0, 0, 0}},
 146  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 147   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 148   {1, 0, 0, 0}},
 149  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 150   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 151   {1, 0, 0, 0}},
 152  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 153   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 154   {1, 0, 0, 0}},
 155  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 156   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 157   {1, 0, 0, 0}},
 158  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 159   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 160   {1, 0, 0, 0}},
 161  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 162   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 163   {1, 0, 0, 0}},
 164  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 165   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 166   {1, 0, 0, 0}},
 167  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 168   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 169   {1, 0, 0, 0}},
 170  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 171   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 172   {1, 0, 0, 0}},
 173  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 174   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 175   {1, 0, 0, 0}},
 176  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 177   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 178   {1, 0, 0, 0}},
 179  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 180   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 181   {1, 0, 0, 0}},
 182  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 183   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 184   {1, 0, 0, 0}}},
 185 {{{0, 0, 0, 0},
 186   {0, 0, 0, 0},
 187   {0, 0, 0, 0}},
 188  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 189   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 190   {1, 0, 0, 0}},
 191  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 192   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 193   {1, 0, 0, 0}},
 194  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 195   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 196   {1, 0, 0, 0}},
 197  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 198   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 199   {1, 0, 0, 0}},
 200  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 201   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 202   {1, 0, 0, 0}},
 203  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 204   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 205   {1, 0, 0, 0}},
 206  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 207   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 208   {1, 0, 0, 0}},
 209  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 210   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 211   {1, 0, 0, 0}},
 212  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 213   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 214   {1, 0, 0, 0}},
 215  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 216   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 217   {1, 0, 0, 0}},
 218  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 219   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 220   {1, 0, 0, 0}},
 221  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 222   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 223   {1, 0, 0, 0}},
 224  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 225   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 226   {1, 0, 0, 0}},
 227  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 228   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 229   {1, 0, 0, 0}},
 230  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 231   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 232   {1, 0, 0, 0}}}
 233 };
 234
 235 /* Precomputation for the group generator. */
 236 struct nistp224_pre_comp_st {
 237     felem g_pre_comp[2][16][3];
 238     int references;
 239     CRYPTO_RWLOCK *lock;
 240 };
 241
 242 const EC_METHOD *EC_GFp_nistp224_method(void)
 243 {
 244     static const EC_METHOD ret = {
 245         EC_FLAGS_DEFAULT_OCT,
 246         NID_X9_62_prime_field,
 247         ec_GFp_nistp224_group_init,
 248         ec_GFp_simple_group_finish,
 249         ec_GFp_simple_group_clear_finish,
 250         ec_GFp_nist_group_copy,
 251         ec_GFp_nistp224_group_set_curve,
 252         ec_GFp_simple_group_get_curve,
 253         ec_GFp_simple_group_get_degree,
 254         ec_group_simple_order_bits,
 255         ec_GFp_simple_group_check_discriminant,
 256         ec_GFp_simple_point_init,
 257         ec_GFp_simple_point_finish,
 258         ec_GFp_simple_point_clear_finish,
 259         ec_GFp_simple_point_copy,
 260         ec_GFp_simple_point_set_to_infinity,
 261         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 262         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 263         ec_GFp_simple_point_set_affine_coordinates,
 264         ec_GFp_nistp224_point_get_affine_coordinates,
 265         0 /* point_set_compressed_coordinates */ ,
 266         0 /* point2oct */ ,
 267         0 /* oct2point */ ,
 268         ec_GFp_simple_add,
 269         ec_GFp_simple_dbl,
 270         ec_GFp_simple_invert,
 271         ec_GFp_simple_is_at_infinity,
 272         ec_GFp_simple_is_on_curve,
 273         ec_GFp_simple_cmp,
 274         ec_GFp_simple_make_affine,
 275         ec_GFp_simple_points_make_affine,
 276         ec_GFp_nistp224_points_mul,
 277         ec_GFp_nistp224_precompute_mult,
 278         ec_GFp_nistp224_have_precompute_mult,
 279         ec_GFp_nist_field_mul,
 280         ec_GFp_nist_field_sqr,
 281         0 /* field_div */ ,
 282         ec_GFp_simple_field_inv,
 283         0 /* field_encode */ ,
 284         0 /* field_decode */ ,
 285         0,                      /* field_set_to_one */
 286         ec_key_simple_priv2oct,
 287         ec_key_simple_oct2priv,
 288         0, /* set private */
 289         ec_key_simple_generate_key,
 290         ec_key_simple_check_key,
 291         ec_key_simple_generate_public_key,
 292         0, /* keycopy */
 293         0, /* keyfinish */
 294         ecdh_simple_compute_key,
 295         0  /* blind_coordinates */
 296     };
 297
 298     return &ret;
 299 }
 300
 301 /*
 302  * Helper functions to convert field elements to/from internal representation
 303  */
 304 static void bin28_to_felem(felem out, const u8 in[28])
 305 {
 306     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 307     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 308     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 309     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 310 }
 311
 312 static void felem_to_bin28(u8 out[28], const felem in)
 313 {
 314     unsigned i;
 315     for (i = 0; i < 7; ++i) {
 316         out[i] = in[0] >> (8 * i);
 317         out[i + 7] = in[1] >> (8 * i);
 318         out[i + 14] = in[2] >> (8 * i);
 319         out[i + 21] = in[3] >> (8 * i);
 320     }
 321 }
 322
 323 /* From OpenSSL BIGNUM to internal representation */
 324 static int BN_to_felem(felem out, const BIGNUM *bn)
 325 {
 326     felem_bytearray b_out;
 327     int num_bytes;
 328
 329     if (BN_is_negative(bn)) {
 330         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 331         return 0;
 332     }
 333     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 334     if (num_bytes < 0) {
 335         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 336         return 0;
 337     }
 338     bin28_to_felem(out, b_out);
 339     return 1;
 340 }
 341
 342 /* From internal representation to OpenSSL BIGNUM */
 343 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 344 {
 345     felem_bytearray b_out;
 346     felem_to_bin28(b_out, in);
 347     return BN_lebin2bn(b_out, sizeof(b_out), out);
 348 }
 349
 350 /******************************************************************************/
 351 /*-
 352  *                              FIELD OPERATIONS
 353  *
 354  * Field operations, using the internal representation of field elements.
 355  * NB! These operations are specific to our point multiplication and cannot be
 356  * expected to be correct in general - e.g., multiplication with a large scalar
 357  * will cause an overflow.
 358  *
 359  */
 360
 361 static void felem_one(felem out)
 362 {
 363     out[0] = 1;
 364     out[1] = 0;
 365     out[2] = 0;
 366     out[3] = 0;
 367 }
 368
 369 static void felem_assign(felem out, const felem in)
 370 {
 371     out[0] = in[0];
 372     out[1] = in[1];
 373     out[2] = in[2];
 374     out[3] = in[3];
 375 }
 376
 377 /* Sum two field elements: out += in */
 378 static void felem_sum(felem out, const felem in)
 379 {
 380     out[0] += in[0];
 381     out[1] += in[1];
 382     out[2] += in[2];
 383     out[3] += in[3];
 384 }
 385
 386 /* Get negative value: out = -in */
 387 /* Assumes in[i] < 2^57 */
 388 static void felem_neg(felem out, const felem in)
 389 {
 390     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 391     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 392     static const limb two58m42m2 = (((limb) 1) << 58) -
 393         (((limb) 1) << 42) - (((limb) 1) << 2);
 394
 395     /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
 396     out[0] = two58p2 - in[0];
 397     out[1] = two58m42m2 - in[1];
 398     out[2] = two58m2 - in[2];
 399     out[3] = two58m2 - in[3];
 400 }
 401
 402 /* Subtract field elements: out -= in */
 403 /* Assumes in[i] < 2^57 */
 404 static void felem_diff(felem out, const felem in)
 405 {
 406     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 407     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 408     static const limb two58m42m2 = (((limb) 1) << 58) -
 409         (((limb) 1) << 42) - (((limb) 1) << 2);
 410
 411     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 412     out[0] += two58p2;
 413     out[1] += two58m42m2;
 414     out[2] += two58m2;
 415     out[3] += two58m2;
 416
 417     out[0] -= in[0];
 418     out[1] -= in[1];
 419     out[2] -= in[2];
 420     out[3] -= in[3];
 421 }
 422
 423 /* Subtract in unreduced 128-bit mode: out -= in */
 424 /* Assumes in[i] < 2^119 */
 425 static void widefelem_diff(widefelem out, const widefelem in)
 426 {
 427     static const widelimb two120 = ((widelimb) 1) << 120;
 428     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 429         (((widelimb) 1) << 64);
 430     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 431         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 432
 433     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 434     out[0] += two120;
 435     out[1] += two120m64;
 436     out[2] += two120m64;
 437     out[3] += two120;
 438     out[4] += two120m104m64;
 439     out[5] += two120m64;
 440     out[6] += two120m64;
 441
 442     out[0] -= in[0];
 443     out[1] -= in[1];
 444     out[2] -= in[2];
 445     out[3] -= in[3];
 446     out[4] -= in[4];
 447     out[5] -= in[5];
 448     out[6] -= in[6];
 449 }
 450
 451 /* Subtract in mixed mode: out128 -= in64 */
 452 /* in[i] < 2^63 */
 453 static void felem_diff_128_64(widefelem out, const felem in)
 454 {
 455     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 456         (((widelimb) 1) << 8);
 457     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 458         (((widelimb) 1) << 8);
 459     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 460         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 461
 462     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 463     out[0] += two64p8;
 464     out[1] += two64m48m8;
 465     out[2] += two64m8;
 466     out[3] += two64m8;
 467
 468     out[0] -= in[0];
 469     out[1] -= in[1];
 470     out[2] -= in[2];
 471     out[3] -= in[3];
 472 }
 473
 474 /*
 475  * Multiply a field element by a scalar: out = out * scalar The scalars we
 476  * actually use are small, so results fit without overflow
 477  */
 478 static void felem_scalar(felem out, const limb scalar)
 479 {
 480     out[0] *= scalar;
 481     out[1] *= scalar;
 482     out[2] *= scalar;
 483     out[3] *= scalar;
 484 }
 485
 486 /*
 487  * Multiply an unreduced field element by a scalar: out = out * scalar The
 488  * scalars we actually use are small, so results fit without overflow
 489  */
 490 static void widefelem_scalar(widefelem out, const widelimb scalar)
 491 {
 492     out[0] *= scalar;
 493     out[1] *= scalar;
 494     out[2] *= scalar;
 495     out[3] *= scalar;
 496     out[4] *= scalar;
 497     out[5] *= scalar;
 498     out[6] *= scalar;
 499 }
 500
 501 /* Square a field element: out = in^2 */
 502 static void felem_square(widefelem out, const felem in)
 503 {
 504     limb tmp0, tmp1, tmp2;
 505     tmp0 = 2 * in[0];
 506     tmp1 = 2 * in[1];
 507     tmp2 = 2 * in[2];
 508     out[0] = ((widelimb) in[0]) * in[0];
 509     out[1] = ((widelimb) in[0]) * tmp1;
 510     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 511     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 512     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 513     out[5] = ((widelimb) in[3]) * tmp2;
 514     out[6] = ((widelimb) in[3]) * in[3];
 515 }
 516
 517 /* Multiply two field elements: out = in1 * in2 */
 518 static void felem_mul(widefelem out, const felem in1, const felem in2)
 519 {
 520     out[0] = ((widelimb) in1[0]) * in2[0];
 521     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 522     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 523              ((widelimb) in1[2]) * in2[0];
 524     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 525              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 526     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 527              ((widelimb) in1[3]) * in2[1];
 528     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 529     out[6] = ((widelimb) in1[3]) * in2[3];
 530 }
 531
 532 /*-
 533  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 534  * Requires in[i] < 2^126,
 535  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 536 static void felem_reduce(felem out, const widefelem in)
 537 {
 538     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 539         (((widelimb) 1) << 15);
 540     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 541         (((widelimb) 1) << 71);
 542     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 543         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 544     widelimb output[5];
 545
 546     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 547     output[0] = in[0] + two127p15;
 548     output[1] = in[1] + two127m71m55;
 549     output[2] = in[2] + two127m71;
 550     output[3] = in[3];
 551     output[4] = in[4];
 552
 553     /* Eliminate in[4], in[5], in[6] */
 554     output[4] += in[6] >> 16;
 555     output[3] += (in[6] & 0xffff) << 40;
 556     output[2] -= in[6];
 557
 558     output[3] += in[5] >> 16;
 559     output[2] += (in[5] & 0xffff) << 40;
 560     output[1] -= in[5];
 561
 562     output[2] += output[4] >> 16;
 563     output[1] += (output[4] & 0xffff) << 40;
 564     output[0] -= output[4];
 565
 566     /* Carry 2 -> 3 -> 4 */
 567     output[3] += output[2] >> 56;
 568     output[2] &= 0x00ffffffffffffff;
 569
 570     output[4] = output[3] >> 56;
 571     output[3] &= 0x00ffffffffffffff;
 572
 573     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 574
 575     /* Eliminate output[4] */
 576     output[2] += output[4] >> 16;
 577     /* output[2] < 2^56 + 2^56 = 2^57 */
 578     output[1] += (output[4] & 0xffff) << 40;
 579     output[0] -= output[4];
 580
 581     /* Carry 0 -> 1 -> 2 -> 3 */
 582     output[1] += output[0] >> 56;
 583     out[0] = output[0] & 0x00ffffffffffffff;
 584
 585     output[2] += output[1] >> 56;
 586     /* output[2] < 2^57 + 2^72 */
 587     out[1] = output[1] & 0x00ffffffffffffff;
 588     output[3] += output[2] >> 56;
 589     /* output[3] <= 2^56 + 2^16 */
 590     out[2] = output[2] & 0x00ffffffffffffff;
 591
 592     /*-
 593      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 594      * out[3] <= 2^56 + 2^16 (due to final carry),
 595      * so out < 2*p
 596      */
 597     out[3] = output[3];
 598 }
 599
 600 static void felem_square_reduce(felem out, const felem in)
 601 {
 602     widefelem tmp;
 603     felem_square(tmp, in);
 604     felem_reduce(out, tmp);
 605 }
 606
 607 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 608 {
 609     widefelem tmp;
 610     felem_mul(tmp, in1, in2);
 611     felem_reduce(out, tmp);
 612 }
 613
 614 /*
 615  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 616  * call felem_reduce first)
 617  */
 618 static void felem_contract(felem out, const felem in)
 619 {
 620     static const int64_t two56 = ((limb) 1) << 56;
 621     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 622     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 623     int64_t tmp[4], a;
 624     tmp[0] = in[0];
 625     tmp[1] = in[1];
 626     tmp[2] = in[2];
 627     tmp[3] = in[3];
 628     /* Case 1: a = 1 iff in >= 2^224 */
 629     a = (in[3] >> 56);
 630     tmp[0] -= a;
 631     tmp[1] += a << 40;
 632     tmp[3] &= 0x00ffffffffffffff;
 633     /*
 634      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 635      * and the lower part is non-zero
 636      */
 637     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 638         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 639     a &= 0x00ffffffffffffff;
 640     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 641     a = (a - 1) >> 63;
 642     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 643     tmp[3] &= a ^ 0xffffffffffffffff;
 644     tmp[2] &= a ^ 0xffffffffffffffff;
 645     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 646     tmp[0] -= 1 & a;
 647
 648     /*
 649      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 650      * non-zero, so we only need one step
 651      */
 652     a = tmp[0] >> 63;
 653     tmp[0] += two56 & a;
 654     tmp[1] -= 1 & a;
 655
 656     /* carry 1 -> 2 -> 3 */
 657     tmp[2] += tmp[1] >> 56;
 658     tmp[1] &= 0x00ffffffffffffff;
 659
 660     tmp[3] += tmp[2] >> 56;
 661     tmp[2] &= 0x00ffffffffffffff;
 662
 663     /* Now 0 <= out < p */
 664     out[0] = tmp[0];
 665     out[1] = tmp[1];
 666     out[2] = tmp[2];
 667     out[3] = tmp[3];
 668 }
 669
 670 /*
 671  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 672  * elements are reduced to in < 2^225, so we only need to check three cases:
 673  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 674  */
 675 static limb felem_is_zero(const felem in)
 676 {
 677     limb zero, two224m96p1, two225m97p2;
 678
 679     zero = in[0] | in[1] | in[2] | in[3];
 680     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 681     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 682         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 683     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 684     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 685         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 686     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 687     return (zero | two224m96p1 | two225m97p2);
 688 }
 689
 690 static int felem_is_zero_int(const void *in)
 691 {
 692     return (int)(felem_is_zero(in) & ((limb) 1));
 693 }
 694
 695 /* Invert a field element */
 696 /* Computation chain copied from djb's code */
 697 static void felem_inv(felem out, const felem in)
 698 {
 699     felem ftmp, ftmp2, ftmp3, ftmp4;
 700     widefelem tmp;
 701     unsigned i;
 702
 703     felem_square(tmp, in);
 704     felem_reduce(ftmp, tmp);    /* 2 */
 705     felem_mul(tmp, in, ftmp);
 706     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 707     felem_square(tmp, ftmp);
 708     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 709     felem_mul(tmp, in, ftmp);
 710     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 711     felem_square(tmp, ftmp);
 712     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 713     felem_square(tmp, ftmp2);
 714     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 715     felem_square(tmp, ftmp2);
 716     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 717     felem_mul(tmp, ftmp2, ftmp);
 718     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 719     felem_square(tmp, ftmp);
 720     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 721     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 722         felem_square(tmp, ftmp2);
 723         felem_reduce(ftmp2, tmp);
 724     }
 725     felem_mul(tmp, ftmp2, ftmp);
 726     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 727     felem_square(tmp, ftmp2);
 728     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 729     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 730         felem_square(tmp, ftmp3);
 731         felem_reduce(ftmp3, tmp);
 732     }
 733     felem_mul(tmp, ftmp3, ftmp2);
 734     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 735     felem_square(tmp, ftmp2);
 736     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 737     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 738         felem_square(tmp, ftmp3);
 739         felem_reduce(ftmp3, tmp);
 740     }
 741     felem_mul(tmp, ftmp3, ftmp2);
 742     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 743     felem_square(tmp, ftmp3);
 744     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 745     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 746         felem_square(tmp, ftmp4);
 747         felem_reduce(ftmp4, tmp);
 748     }
 749     felem_mul(tmp, ftmp3, ftmp4);
 750     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 751     felem_square(tmp, ftmp3);
 752     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 753     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 754         felem_square(tmp, ftmp4);
 755         felem_reduce(ftmp4, tmp);
 756     }
 757     felem_mul(tmp, ftmp2, ftmp4);
 758     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 759     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 760         felem_square(tmp, ftmp2);
 761         felem_reduce(ftmp2, tmp);
 762     }
 763     felem_mul(tmp, ftmp2, ftmp);
 764     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 765     felem_square(tmp, ftmp);
 766     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 767     felem_mul(tmp, ftmp, in);
 768     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 769     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 770         felem_square(tmp, ftmp);
 771         felem_reduce(ftmp, tmp);
 772     }
 773     felem_mul(tmp, ftmp, ftmp3);
 774     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 775 }
 776
 777 /*
 778  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 779  * out to itself.
 780  */
 781 static void copy_conditional(felem out, const felem in, limb icopy)
 782 {
 783     unsigned i;
 784     /*
 785      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 786      */
 787     const limb copy = -icopy;
 788     for (i = 0; i < 4; ++i) {
 789         const limb tmp = copy & (in[i] ^ out[i]);
 790         out[i] ^= tmp;
 791     }
 792 }
 793
 794 /******************************************************************************/
 795 /*-
 796  *                       ELLIPTIC CURVE POINT OPERATIONS
 797  *
 798  * Points are represented in Jacobian projective coordinates:
 799  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 800  * or to the point at infinity if Z == 0.
 801  *
 802  */
 803
 804 /*-
 805  * Double an elliptic curve point:
 806  * (X', Y', Z') = 2 * (X, Y, Z), where
 807  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 808  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
 809  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 810  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 811  * while x_out == y_in is not (maybe this works, but it's not tested).
 812  */
 813 static void
 814 point_double(felem x_out, felem y_out, felem z_out,
 815              const felem x_in, const felem y_in, const felem z_in)
 816 {
 817     widefelem tmp, tmp2;
 818     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 819
 820     felem_assign(ftmp, x_in);
 821     felem_assign(ftmp2, x_in);
 822
 823     /* delta = z^2 */
 824     felem_square(tmp, z_in);
 825     felem_reduce(delta, tmp);
 826
 827     /* gamma = y^2 */
 828     felem_square(tmp, y_in);
 829     felem_reduce(gamma, tmp);
 830
 831     /* beta = x*gamma */
 832     felem_mul(tmp, x_in, gamma);
 833     felem_reduce(beta, tmp);
 834
 835     /* alpha = 3*(x-delta)*(x+delta) */
 836     felem_diff(ftmp, delta);
 837     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 838     felem_sum(ftmp2, delta);
 839     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 840     felem_scalar(ftmp2, 3);
 841     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 842     felem_mul(tmp, ftmp, ftmp2);
 843     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 844     felem_reduce(alpha, tmp);
 845
 846     /* x' = alpha^2 - 8*beta */
 847     felem_square(tmp, alpha);
 848     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 849     felem_assign(ftmp, beta);
 850     felem_scalar(ftmp, 8);
 851     /* ftmp[i] < 8 * 2^57 = 2^60 */
 852     felem_diff_128_64(tmp, ftmp);
 853     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 854     felem_reduce(x_out, tmp);
 855
 856     /* z' = (y + z)^2 - gamma - delta */
 857     felem_sum(delta, gamma);
 858     /* delta[i] < 2^57 + 2^57 = 2^58 */
 859     felem_assign(ftmp, y_in);
 860     felem_sum(ftmp, z_in);
 861     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 862     felem_square(tmp, ftmp);
 863     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 864     felem_diff_128_64(tmp, delta);
 865     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 866     felem_reduce(z_out, tmp);
 867
 868     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 869     felem_scalar(beta, 4);
 870     /* beta[i] < 4 * 2^57 = 2^59 */
 871     felem_diff(beta, x_out);
 872     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 873     felem_mul(tmp, alpha, beta);
 874     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 875     felem_square(tmp2, gamma);
 876     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 877     widefelem_scalar(tmp2, 8);
 878     /* tmp2[i] < 8 * 2^116 = 2^119 */
 879     widefelem_diff(tmp, tmp2);
 880     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 881     felem_reduce(y_out, tmp);
 882 }
 883
 884 /*-
 885  * Add two elliptic curve points:
 886  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 887  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 888  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 889  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 890  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 891  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 892  *
 893  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 894  */
 895
 896 /*
 897  * This function is not entirely constant-time: it includes a branch for
 898  * checking whether the two input points are equal, (while not equal to the
 899  * point at infinity). This case never happens during single point
 900  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 901  */
 902 static void point_add(felem x3, felem y3, felem z3,
 903                       const felem x1, const felem y1, const felem z1,
 904                       const int mixed, const felem x2, const felem y2,
 905                       const felem z2)
 906 {
 907     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 908     widefelem tmp, tmp2;
 909     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 910
 911     if (!mixed) {
 912         /* ftmp2 = z2^2 */
 913         felem_square(tmp, z2);
 914         felem_reduce(ftmp2, tmp);
 915
 916         /* ftmp4 = z2^3 */
 917         felem_mul(tmp, ftmp2, z2);
 918         felem_reduce(ftmp4, tmp);
 919
 920         /* ftmp4 = z2^3*y1 */
 921         felem_mul(tmp2, ftmp4, y1);
 922         felem_reduce(ftmp4, tmp2);
 923
 924         /* ftmp2 = z2^2*x1 */
 925         felem_mul(tmp2, ftmp2, x1);
 926         felem_reduce(ftmp2, tmp2);
 927     } else {
 928         /*
 929          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 930          */
 931
 932         /* ftmp4 = z2^3*y1 */
 933         felem_assign(ftmp4, y1);
 934
 935         /* ftmp2 = z2^2*x1 */
 936         felem_assign(ftmp2, x1);
 937     }
 938
 939     /* ftmp = z1^2 */
 940     felem_square(tmp, z1);
 941     felem_reduce(ftmp, tmp);
 942
 943     /* ftmp3 = z1^3 */
 944     felem_mul(tmp, ftmp, z1);
 945     felem_reduce(ftmp3, tmp);
 946
 947     /* tmp = z1^3*y2 */
 948     felem_mul(tmp, ftmp3, y2);
 949     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 950
 951     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 952     felem_diff_128_64(tmp, ftmp4);
 953     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 954     felem_reduce(ftmp3, tmp);
 955
 956     /* tmp = z1^2*x2 */
 957     felem_mul(tmp, ftmp, x2);
 958     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 959
 960     /* ftmp = z1^2*x2 - z2^2*x1 */
 961     felem_diff_128_64(tmp, ftmp2);
 962     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 963     felem_reduce(ftmp, tmp);
 964
 965     /*
 966      * the formulae are incorrect if the points are equal so we check for
 967      * this and do doubling if this happens
 968      */
 969     x_equal = felem_is_zero(ftmp);
 970     y_equal = felem_is_zero(ftmp3);
 971     z1_is_zero = felem_is_zero(z1);
 972     z2_is_zero = felem_is_zero(z2);
 973     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
 974     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
 975         point_double(x3, y3, z3, x1, y1, z1);
 976         return;
 977     }
 978
 979     /* ftmp5 = z1*z2 */
 980     if (!mixed) {
 981         felem_mul(tmp, z1, z2);
 982         felem_reduce(ftmp5, tmp);
 983     } else {
 984         /* special case z2 = 0 is handled later */
 985         felem_assign(ftmp5, z1);
 986     }
 987
 988     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
 989     felem_mul(tmp, ftmp, ftmp5);
 990     felem_reduce(z_out, tmp);
 991
 992     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
 993     felem_assign(ftmp5, ftmp);
 994     felem_square(tmp, ftmp);
 995     felem_reduce(ftmp, tmp);
 996
 997     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
 998     felem_mul(tmp, ftmp, ftmp5);
 999     felem_reduce(ftmp5, tmp);
1000
1001     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1002     felem_mul(tmp, ftmp2, ftmp);
1003     felem_reduce(ftmp2, tmp);
1004
1005     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1006     felem_mul(tmp, ftmp4, ftmp5);
1007     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1008
1009     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1010     felem_square(tmp2, ftmp3);
1011     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1012
1013     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1014     felem_diff_128_64(tmp2, ftmp5);
1015     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1016
1017     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1018     felem_assign(ftmp5, ftmp2);
1019     felem_scalar(ftmp5, 2);
1020     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1021
1022     /*-
1023      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1024      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1025      */
1026     felem_diff_128_64(tmp2, ftmp5);
1027     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1028     felem_reduce(x_out, tmp2);
1029
1030     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1031     felem_diff(ftmp2, x_out);
1032     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1033
1034     /*
1035      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1036      */
1037     felem_mul(tmp2, ftmp3, ftmp2);
1038     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1039
1040     /*-
1041      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1042      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1043      */
1044     widefelem_diff(tmp2, tmp);
1045     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1046     felem_reduce(y_out, tmp2);
1047
1048     /*
1049      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1050      * the point at infinity, so we need to check for this separately
1051      */
1052
1053     /*
1054      * if point 1 is at infinity, copy point 2 to output, and vice versa
1055      */
1056     copy_conditional(x_out, x2, z1_is_zero);
1057     copy_conditional(x_out, x1, z2_is_zero);
1058     copy_conditional(y_out, y2, z1_is_zero);
1059     copy_conditional(y_out, y1, z2_is_zero);
1060     copy_conditional(z_out, z2, z1_is_zero);
1061     copy_conditional(z_out, z1, z2_is_zero);
1062     felem_assign(x3, x_out);
1063     felem_assign(y3, y_out);
1064     felem_assign(z3, z_out);
1065 }
1066
1067 /*
1068  * select_point selects the |idx|th point from a precomputation table and
1069  * copies it to out.
1070  * The pre_comp array argument should be size of |size| argument
1071  */
1072 static void select_point(const u64 idx, unsigned int size,
1073                          const felem pre_comp[][3], felem out[3])
1074 {
1075     unsigned i, j;
1076     limb *outlimbs = &out[0][0];
1077
1078     memset(out, 0, sizeof(*out) * 3);
1079     for (i = 0; i < size; i++) {
1080         const limb *inlimbs = &pre_comp[i][0][0];
1081         u64 mask = i ^ idx;
1082         mask |= mask >> 4;
1083         mask |= mask >> 2;
1084         mask |= mask >> 1;
1085         mask &= 1;
1086         mask--;
1087         for (j = 0; j < 4 * 3; j++)
1088             outlimbs[j] |= inlimbs[j] & mask;
1089     }
1090 }
1091
1092 /* get_bit returns the |i|th bit in |in| */
1093 static char get_bit(const felem_bytearray in, unsigned i)
1094 {
1095     if (i >= 224)
1096         return 0;
1097     return (in[i >> 3] >> (i & 7)) & 1;
1098 }
1099
1100 /*
1101  * Interleaved point multiplication using precomputed point multiples: The
1102  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1103  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1104  * generator, using certain (large) precomputed multiples in g_pre_comp.
1105  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1106  */
1107 static void batch_mul(felem x_out, felem y_out, felem z_out,
1108                       const felem_bytearray scalars[],
1109                       const unsigned num_points, const u8 *g_scalar,
1110                       const int mixed, const felem pre_comp[][17][3],
1111                       const felem g_pre_comp[2][16][3])
1112 {
1113     int i, skip;
1114     unsigned num;
1115     unsigned gen_mul = (g_scalar != NULL);
1116     felem nq[3], tmp[4];
1117     u64 bits;
1118     u8 sign, digit;
1119
1120     /* set nq to the point at infinity */
1121     memset(nq, 0, sizeof(nq));
1122
1123     /*
1124      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1125      * of the generator (two in each of the last 28 rounds) and additions of
1126      * other points multiples (every 5th round).
1127      */
1128     skip = 1;                   /* save two point operations in the first
1129                                  * round */
1130     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1131         /* double */
1132         if (!skip)
1133             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1134
1135         /* add multiples of the generator */
1136         if (gen_mul && (i <= 27)) {
1137             /* first, look 28 bits upwards */
1138             bits = get_bit(g_scalar, i + 196) << 3;
1139             bits |= get_bit(g_scalar, i + 140) << 2;
1140             bits |= get_bit(g_scalar, i + 84) << 1;
1141             bits |= get_bit(g_scalar, i + 28);
1142             /* select the point to add, in constant time */
1143             select_point(bits, 16, g_pre_comp[1], tmp);
1144
1145             if (!skip) {
1146                 /* value 1 below is argument for "mixed" */
1147                 point_add(nq[0], nq[1], nq[2],
1148                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1149             } else {
1150                 memcpy(nq, tmp, 3 * sizeof(felem));
1151                 skip = 0;
1152             }
1153
1154             /* second, look at the current position */
1155             bits = get_bit(g_scalar, i + 168) << 3;
1156             bits |= get_bit(g_scalar, i + 112) << 2;
1157             bits |= get_bit(g_scalar, i + 56) << 1;
1158             bits |= get_bit(g_scalar, i);
1159             /* select the point to add, in constant time */
1160             select_point(bits, 16, g_pre_comp[0], tmp);
1161             point_add(nq[0], nq[1], nq[2],
1162                       nq[0], nq[1], nq[2],
1163                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1164         }
1165
1166         /* do other additions every 5 doublings */
1167         if (num_points && (i % 5 == 0)) {
1168             /* loop over all scalars */
1169             for (num = 0; num < num_points; ++num) {
1170                 bits = get_bit(scalars[num], i + 4) << 5;
1171                 bits |= get_bit(scalars[num], i + 3) << 4;
1172                 bits |= get_bit(scalars[num], i + 2) << 3;
1173                 bits |= get_bit(scalars[num], i + 1) << 2;
1174                 bits |= get_bit(scalars[num], i) << 1;
1175                 bits |= get_bit(scalars[num], i - 1);
1176                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1177
1178                 /* select the point to add or subtract */
1179                 select_point(digit, 17, pre_comp[num], tmp);
1180                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1181                                             * point */
1182                 copy_conditional(tmp[1], tmp[3], sign);
1183
1184                 if (!skip) {
1185                     point_add(nq[0], nq[1], nq[2],
1186                               nq[0], nq[1], nq[2],
1187                               mixed, tmp[0], tmp[1], tmp[2]);
1188                 } else {
1189                     memcpy(nq, tmp, 3 * sizeof(felem));
1190                     skip = 0;
1191                 }
1192             }
1193         }
1194     }
1195     felem_assign(x_out, nq[0]);
1196     felem_assign(y_out, nq[1]);
1197     felem_assign(z_out, nq[2]);
1198 }
1199
1200 /******************************************************************************/
1201 /*
1202  * FUNCTIONS TO MANAGE PRECOMPUTATION
1203  */
1204
1205 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1206 {
1207     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1208
1209     if (!ret) {
1210         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1211         return ret;
1212     }
1213
1214     ret->references = 1;
1215
1216     ret->lock = CRYPTO_THREAD_lock_new();
1217     if (ret->lock == NULL) {
1218         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1219         OPENSSL_free(ret);
1220         return NULL;
1221     }
1222     return ret;
1223 }
1224
1225 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1226 {
1227     int i;
1228     if (p != NULL)
1229         CRYPTO_atomic_add(&p->references, 1, &i, p->lock);
1230     return p;
1231 }
1232
1233 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1234 {
1235     int i;
1236
1237     if (p == NULL)
1238         return;
1239
1240     CRYPTO_atomic_add(&p->references, -1, &i, p->lock);
1241     REF_PRINT_COUNT("EC_nistp224", x);
1242     if (i > 0)
1243         return;
1244     REF_ASSERT_ISNT(i < 0);
1245
1246     CRYPTO_THREAD_lock_free(p->lock);
1247     OPENSSL_free(p);
1248 }
1249
1250 /******************************************************************************/
1251 /*
1252  * OPENSSL EC_METHOD FUNCTIONS
1253  */
1254
1255 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1256 {
1257     int ret;
1258     ret = ec_GFp_simple_group_init(group);
1259     group->a_is_minus3 = 1;
1260     return ret;
1261 }
1262
1263 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1264                                     const BIGNUM *a, const BIGNUM *b,
1265                                     BN_CTX *ctx)
1266 {
1267     int ret = 0;
1268     BN_CTX *new_ctx = NULL;
1269     BIGNUM *curve_p, *curve_a, *curve_b;
1270
1271     if (ctx == NULL)
1272         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1273             return 0;
1274     BN_CTX_start(ctx);
1275     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1276         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1277         ((curve_b = BN_CTX_get(ctx)) == NULL))
1278         goto err;
1279     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1280     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1281     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1282     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1283         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1284               EC_R_WRONG_CURVE_PARAMETERS);
1285         goto err;
1286     }
1287     group->field_mod_func = BN_nist_mod_224;
1288     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1289  err:
1290     BN_CTX_end(ctx);
1291     BN_CTX_free(new_ctx);
1292     return ret;
1293 }
1294
1295 /*
1296  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1297  * (X/Z^2, Y/Z^3)
1298  */
1299 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1300                                                  const EC_POINT *point,
1301                                                  BIGNUM *x, BIGNUM *y,
1302                                                  BN_CTX *ctx)
1303 {
1304     felem z1, z2, x_in, y_in, x_out, y_out;
1305     widefelem tmp;
1306
1307     if (EC_POINT_is_at_infinity(group, point)) {
1308         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1309               EC_R_POINT_AT_INFINITY);
1310         return 0;
1311     }
1312     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1313         (!BN_to_felem(z1, point->Z)))
1314         return 0;
1315     felem_inv(z2, z1);
1316     felem_square(tmp, z2);
1317     felem_reduce(z1, tmp);
1318     felem_mul(tmp, x_in, z1);
1319     felem_reduce(x_in, tmp);
1320     felem_contract(x_out, x_in);
1321     if (x != NULL) {
1322         if (!felem_to_BN(x, x_out)) {
1323             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1324                   ERR_R_BN_LIB);
1325             return 0;
1326         }
1327     }
1328     felem_mul(tmp, z1, z2);
1329     felem_reduce(z1, tmp);
1330     felem_mul(tmp, y_in, z1);
1331     felem_reduce(y_in, tmp);
1332     felem_contract(y_out, y_in);
1333     if (y != NULL) {
1334         if (!felem_to_BN(y, y_out)) {
1335             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1336                   ERR_R_BN_LIB);
1337             return 0;
1338         }
1339     }
1340     return 1;
1341 }
1342
1343 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1344                                felem tmp_felems[ /* num+1 */ ])
1345 {
1346     /*
1347      * Runs in constant time, unless an input is the point at infinity (which
1348      * normally shouldn't happen).
1349      */
1350     ec_GFp_nistp_points_make_affine_internal(num,
1351                                              points,
1352                                              sizeof(felem),
1353                                              tmp_felems,
1354                                              (void (*)(void *))felem_one,
1355                                              felem_is_zero_int,
1356                                              (void (*)(void *, const void *))
1357                                              felem_assign,
1358                                              (void (*)(void *, const void *))
1359                                              felem_square_reduce, (void (*)
1360                                                                    (void *,
1361                                                                     const void
1362                                                                     *,
1363                                                                     const void
1364                                                                     *))
1365                                              felem_mul_reduce,
1366                                              (void (*)(void *, const void *))
1367                                              felem_inv,
1368                                              (void (*)(void *, const void *))
1369                                              felem_contract);
1370 }
1371
1372 /*
1373  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1374  * values Result is stored in r (r can equal one of the inputs).
1375  */
1376 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1377                                const BIGNUM *scalar, size_t num,
1378                                const EC_POINT *points[],
1379                                const BIGNUM *scalars[], BN_CTX *ctx)
1380 {
1381     int ret = 0;
1382     int j;
1383     unsigned i;
1384     int mixed = 0;
1385     BN_CTX *new_ctx = NULL;
1386     BIGNUM *x, *y, *z, *tmp_scalar;
1387     felem_bytearray g_secret;
1388     felem_bytearray *secrets = NULL;
1389     felem (*pre_comp)[17][3] = NULL;
1390     felem *tmp_felems = NULL;
1391     int num_bytes;
1392     int have_pre_comp = 0;
1393     size_t num_points = num;
1394     felem x_in, y_in, z_in, x_out, y_out, z_out;
1395     NISTP224_PRE_COMP *pre = NULL;
1396     const felem(*g_pre_comp)[16][3] = NULL;
1397     EC_POINT *generator = NULL;
1398     const EC_POINT *p = NULL;
1399     const BIGNUM *p_scalar = NULL;
1400
1401     if (ctx == NULL)
1402         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1403             return 0;
1404     BN_CTX_start(ctx);
1405     if (((x = BN_CTX_get(ctx)) == NULL) ||
1406         ((y = BN_CTX_get(ctx)) == NULL) ||
1407         ((z = BN_CTX_get(ctx)) == NULL) ||
1408         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1409         goto err;
1410
1411     if (scalar != NULL) {
1412         pre = group->pre_comp.nistp224;
1413         if (pre)
1414             /* we have precomputation, try to use it */
1415             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1416         else
1417             /* try to use the standard precomputation */
1418             g_pre_comp = &gmul[0];
1419         generator = EC_POINT_new(group);
1420         if (generator == NULL)
1421             goto err;
1422         /* get the generator from precomputation */
1423         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1424             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1425             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1426             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1427             goto err;
1428         }
1429         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1430                                                       generator, x, y, z,
1431                                                       ctx))
1432             goto err;
1433         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1434             /* precomputation matches generator */
1435             have_pre_comp = 1;
1436         else
1437             /*
1438              * we don't have valid precomputation: treat the generator as a
1439              * random point
1440              */
1441             num_points = num_points + 1;
1442     }
1443
1444     if (num_points > 0) {
1445         if (num_points >= 3) {
1446             /*
1447              * unless we precompute multiples for just one or two points,
1448              * converting those into affine form is time well spent
1449              */
1450             mixed = 1;
1451         }
1452         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1453         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1454         if (mixed)
1455             tmp_felems =
1456                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1457         if ((secrets == NULL) || (pre_comp == NULL)
1458             || (mixed && (tmp_felems == NULL))) {
1459             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1460             goto err;
1461         }
1462
1463         /*
1464          * we treat NULL scalars as 0, and NULL points as points at infinity,
1465          * i.e., they contribute nothing to the linear combination
1466          */
1467         for (i = 0; i < num_points; ++i) {
1468             if (i == num) {
1469                 /* the generator */
1470                 p = EC_GROUP_get0_generator(group);
1471                 p_scalar = scalar;
1472             } else {
1473                 /* the i^th point */
1474                 p = points[i];
1475                 p_scalar = scalars[i];
1476             }
1477             if ((p_scalar != NULL) && (p != NULL)) {
1478                 /* reduce scalar to 0 <= scalar < 2^224 */
1479                 if ((BN_num_bits(p_scalar) > 224)
1480                     || (BN_is_negative(p_scalar))) {
1481                     /*
1482                      * this is an unusual input, and we don't guarantee
1483                      * constant-timeness
1484                      */
1485                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1486                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1487                         goto err;
1488                     }
1489                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1490                                                secrets[i], sizeof(secrets[i]));
1491                 } else {
1492                     num_bytes = BN_bn2lebinpad(p_scalar,
1493                                                secrets[i], sizeof(secrets[i]));
1494                 }
1495                 if (num_bytes < 0) {
1496                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1497                     goto err;
1498                 }
1499                 /* precompute multiples */
1500                 if ((!BN_to_felem(x_out, p->X)) ||
1501                     (!BN_to_felem(y_out, p->Y)) ||
1502                     (!BN_to_felem(z_out, p->Z)))
1503                     goto err;
1504                 felem_assign(pre_comp[i][1][0], x_out);
1505                 felem_assign(pre_comp[i][1][1], y_out);
1506                 felem_assign(pre_comp[i][1][2], z_out);
1507                 for (j = 2; j <= 16; ++j) {
1508                     if (j & 1) {
1509                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1510                                   pre_comp[i][j][2], pre_comp[i][1][0],
1511                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1512                                   pre_comp[i][j - 1][0],
1513                                   pre_comp[i][j - 1][1],
1514                                   pre_comp[i][j - 1][2]);
1515                     } else {
1516                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1517                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1518                                      pre_comp[i][j / 2][1],
1519                                      pre_comp[i][j / 2][2]);
1520                     }
1521                 }
1522             }
1523         }
1524         if (mixed)
1525             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1526     }
1527
1528     /* the scalar for the generator */
1529     if ((scalar != NULL) && (have_pre_comp)) {
1530         memset(g_secret, 0, sizeof(g_secret));
1531         /* reduce scalar to 0 <= scalar < 2^224 */
1532         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1533             /*
1534              * this is an unusual input, and we don't guarantee
1535              * constant-timeness
1536              */
1537             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1538                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1539                 goto err;
1540             }
1541             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1542         } else {
1543             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1544         }
1545         /* do the multiplication with generator precomputation */
1546         batch_mul(x_out, y_out, z_out,
1547                   (const felem_bytearray(*))secrets, num_points,
1548                   g_secret,
1549                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1550     } else {
1551         /* do the multiplication without generator precomputation */
1552         batch_mul(x_out, y_out, z_out,
1553                   (const felem_bytearray(*))secrets, num_points,
1554                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1555     }
1556     /* reduce the output to its unique minimal representation */
1557     felem_contract(x_in, x_out);
1558     felem_contract(y_in, y_out);
1559     felem_contract(z_in, z_out);
1560     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1561         (!felem_to_BN(z, z_in))) {
1562         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1563         goto err;
1564     }
1565     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1566
1567  err:
1568     BN_CTX_end(ctx);
1569     EC_POINT_free(generator);
1570     BN_CTX_free(new_ctx);
1571     OPENSSL_free(secrets);
1572     OPENSSL_free(pre_comp);
1573     OPENSSL_free(tmp_felems);
1574     return ret;
1575 }
1576
1577 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1578 {
1579     int ret = 0;
1580     NISTP224_PRE_COMP *pre = NULL;
1581     int i, j;
1582     BN_CTX *new_ctx = NULL;
1583     BIGNUM *x, *y;
1584     EC_POINT *generator = NULL;
1585     felem tmp_felems[32];
1586
1587     /* throw away old precomputation */
1588     EC_pre_comp_free(group);
1589     if (ctx == NULL)
1590         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1591             return 0;
1592     BN_CTX_start(ctx);
1593     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1594         goto err;
1595     /* get the generator */
1596     if (group->generator == NULL)
1597         goto err;
1598     generator = EC_POINT_new(group);
1599     if (generator == NULL)
1600         goto err;
1601     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1602     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1603     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1604         goto err;
1605     if ((pre = nistp224_pre_comp_new()) == NULL)
1606         goto err;
1607     /*
1608      * if the generator is the standard one, use built-in precomputation
1609      */
1610     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1611         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1612         goto done;
1613     }
1614     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1615         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1616         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1617         goto err;
1618     /*
1619      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1620      * 2^140*G, 2^196*G for the second one
1621      */
1622     for (i = 1; i <= 8; i <<= 1) {
1623         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1624                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1625                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1626         for (j = 0; j < 27; ++j) {
1627             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1628                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1629                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1630         }
1631         if (i == 8)
1632             break;
1633         point_double(pre->g_pre_comp[0][2 * i][0],
1634                      pre->g_pre_comp[0][2 * i][1],
1635                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1636                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1637         for (j = 0; j < 27; ++j) {
1638             point_double(pre->g_pre_comp[0][2 * i][0],
1639                          pre->g_pre_comp[0][2 * i][1],
1640                          pre->g_pre_comp[0][2 * i][2],
1641                          pre->g_pre_comp[0][2 * i][0],
1642                          pre->g_pre_comp[0][2 * i][1],
1643                          pre->g_pre_comp[0][2 * i][2]);
1644         }
1645     }
1646     for (i = 0; i < 2; i++) {
1647         /* g_pre_comp[i][0] is the point at infinity */
1648         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1649         /* the remaining multiples */
1650         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1651         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1652                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1653                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1654                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1655                   pre->g_pre_comp[i][2][2]);
1656         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1657         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1658                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1659                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1660                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1661                   pre->g_pre_comp[i][2][2]);
1662         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1663         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1664                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1665                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1666                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1667                   pre->g_pre_comp[i][4][2]);
1668         /*
1669          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1670          */
1671         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1672                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1673                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1674                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1675                   pre->g_pre_comp[i][2][2]);
1676         for (j = 1; j < 8; ++j) {
1677             /* odd multiples: add G resp. 2^28*G */
1678             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1679                       pre->g_pre_comp[i][2 * j + 1][1],
1680                       pre->g_pre_comp[i][2 * j + 1][2],
1681                       pre->g_pre_comp[i][2 * j][0],
1682                       pre->g_pre_comp[i][2 * j][1],
1683                       pre->g_pre_comp[i][2 * j][2], 0,
1684                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1685                       pre->g_pre_comp[i][1][2]);
1686         }
1687     }
1688     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1689
1690  done:
1691     SETPRECOMP(group, nistp224, pre);
1692     pre = NULL;
1693     ret = 1;
1694  err:
1695     BN_CTX_end(ctx);
1696     EC_POINT_free(generator);
1697     BN_CTX_free(new_ctx);
1698     EC_nistp224_pre_comp_free(pre);
1699     return ret;
1700 }
1701
1702 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1703 {
1704     return HAVEPRECOMP(group, nistp224);
1705 }
1706
1707 #endif