crypto/ec/ecp_nistp224.c

   1 /*
   2  * Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the OpenSSL license (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* Copyright 2011 Google Inc.
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  *
  14  * you may not use this file except in compliance with the License.
  15  * You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  *  Unless required by applicable law or agreed to in writing, software
  20  *  distributed under the License is distributed on an "AS IS" BASIS,
  21  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  *  See the License for the specific language governing permissions and
  23  *  limitations under the License.
  24  */
  25
  26 /*
  27  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
  28  *
  29  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
  30  * and Adam Langley's public domain 64-bit C implementation of curve25519
  31  */
  32
  33 #include <openssl/opensslconf.h>
  34 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
  35 NON_EMPTY_TRANSLATION_UNIT
  36 #else
  37
  38 # include <stdint.h>
  39 # include <string.h>
  40 # include <openssl/err.h>
  41 # include "ec_lcl.h"
  42
  43 # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
  44   /* even with gcc, the typedef won't work for 32-bit platforms */
  45 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
  46                                  * platforms */
  47 # else
  48 #  error "Your compiler doesn't appear to support 128-bit integer types"
  49 # endif
  50
  51 typedef uint8_t u8;
  52 typedef uint64_t u64;
  53
  54 /******************************************************************************/
  55 /*-
  56  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
  57  *
  58  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
  59  * using 64-bit coefficients called 'limbs',
  60  * and sometimes (for multiplication results) as
  61  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
  62  * using 128-bit coefficients called 'widelimbs'.
  63  * A 4-limb representation is an 'felem';
  64  * a 7-widelimb representation is a 'widefelem'.
  65  * Even within felems, bits of adjacent limbs overlap, and we don't always
  66  * reduce the representations: we ensure that inputs to each felem
  67  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
  68  * and fit into a 128-bit word without overflow. The coefficients are then
  69  * again partially reduced to obtain an felem satisfying a_i < 2^57.
  70  * We only reduce to the unique minimal representation at the end of the
  71  * computation.
  72  */
  73
  74 typedef uint64_t limb;
  75 typedef uint128_t widelimb;
  76
  77 typedef limb felem[4];
  78 typedef widelimb widefelem[7];
  79
  80 /*
  81  * Field element represented as a byte array. 28*8 = 224 bits is also the
  82  * group order size for the elliptic curve, and we also use this type for
  83  * scalars for point multiplication.
  84  */
  85 typedef u8 felem_bytearray[28];
  86
  87 static const felem_bytearray nistp224_curve_params[5] = {
  88     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
  89      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
  90      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
  91     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
  92      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
  93      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
  94     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
  95      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
  96      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
  97     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
  98      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
  99      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
 100     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
 101      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
 102      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
 103 };
 104
 105 /*-
 106  * Precomputed multiples of the standard generator
 107  * Points are given in coordinates (X, Y, Z) where Z normally is 1
 108  * (0 for the point at infinity).
 109  * For each field element, slice a_0 is word 0, etc.
 110  *
 111  * The table has 2 * 16 elements, starting with the following:
 112  * index | bits    | point
 113  * ------+---------+------------------------------
 114  *     0 | 0 0 0 0 | 0G
 115  *     1 | 0 0 0 1 | 1G
 116  *     2 | 0 0 1 0 | 2^56G
 117  *     3 | 0 0 1 1 | (2^56 + 1)G
 118  *     4 | 0 1 0 0 | 2^112G
 119  *     5 | 0 1 0 1 | (2^112 + 1)G
 120  *     6 | 0 1 1 0 | (2^112 + 2^56)G
 121  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
 122  *     8 | 1 0 0 0 | 2^168G
 123  *     9 | 1 0 0 1 | (2^168 + 1)G
 124  *    10 | 1 0 1 0 | (2^168 + 2^56)G
 125  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
 126  *    12 | 1 1 0 0 | (2^168 + 2^112)G
 127  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
 128  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
 129  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
 130  * followed by a copy of this with each element multiplied by 2^28.
 131  *
 132  * The reason for this is so that we can clock bits into four different
 133  * locations when doing simple scalar multiplies against the base point,
 134  * and then another four locations using the second 16 elements.
 135  */
 136 static const felem gmul[2][16][3] = {
 137 {{{0, 0, 0, 0},
 138   {0, 0, 0, 0},
 139   {0, 0, 0, 0}},
 140  {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
 141   {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
 142   {1, 0, 0, 0}},
 143  {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
 144   {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
 145   {1, 0, 0, 0}},
 146  {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
 147   {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
 148   {1, 0, 0, 0}},
 149  {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
 150   {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
 151   {1, 0, 0, 0}},
 152  {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
 153   {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
 154   {1, 0, 0, 0}},
 155  {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
 156   {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
 157   {1, 0, 0, 0}},
 158  {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
 159   {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
 160   {1, 0, 0, 0}},
 161  {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
 162   {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
 163   {1, 0, 0, 0}},
 164  {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
 165   {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
 166   {1, 0, 0, 0}},
 167  {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
 168   {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
 169   {1, 0, 0, 0}},
 170  {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
 171   {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
 172   {1, 0, 0, 0}},
 173  {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
 174   {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
 175   {1, 0, 0, 0}},
 176  {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
 177   {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
 178   {1, 0, 0, 0}},
 179  {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
 180   {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
 181   {1, 0, 0, 0}},
 182  {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
 183   {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
 184   {1, 0, 0, 0}}},
 185 {{{0, 0, 0, 0},
 186   {0, 0, 0, 0},
 187   {0, 0, 0, 0}},
 188  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
 189   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
 190   {1, 0, 0, 0}},
 191  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
 192   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
 193   {1, 0, 0, 0}},
 194  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
 195   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
 196   {1, 0, 0, 0}},
 197  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
 198   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
 199   {1, 0, 0, 0}},
 200  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
 201   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
 202   {1, 0, 0, 0}},
 203  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
 204   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
 205   {1, 0, 0, 0}},
 206  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
 207   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
 208   {1, 0, 0, 0}},
 209  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
 210   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
 211   {1, 0, 0, 0}},
 212  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
 213   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
 214   {1, 0, 0, 0}},
 215  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
 216   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
 217   {1, 0, 0, 0}},
 218  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
 219   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
 220   {1, 0, 0, 0}},
 221  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
 222   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
 223   {1, 0, 0, 0}},
 224  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
 225   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
 226   {1, 0, 0, 0}},
 227  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
 228   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
 229   {1, 0, 0, 0}},
 230  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
 231   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
 232   {1, 0, 0, 0}}}
 233 };
 234
 235 /* Precomputation for the group generator. */
 236 struct nistp224_pre_comp_st {
 237     felem g_pre_comp[2][16][3];
 238     CRYPTO_REF_COUNT references;
 239     CRYPTO_RWLOCK *lock;
 240 };
 241
 242 const EC_METHOD *EC_GFp_nistp224_method(void)
 243 {
 244     static const EC_METHOD ret = {
 245         EC_FLAGS_DEFAULT_OCT,
 246         NID_X9_62_prime_field,
 247         ec_GFp_nistp224_group_init,
 248         ec_GFp_simple_group_finish,
 249         ec_GFp_simple_group_clear_finish,
 250         ec_GFp_nist_group_copy,
 251         ec_GFp_nistp224_group_set_curve,
 252         ec_GFp_simple_group_get_curve,
 253         ec_GFp_simple_group_get_degree,
 254         ec_group_simple_order_bits,
 255         ec_GFp_simple_group_check_discriminant,
 256         ec_GFp_simple_point_init,
 257         ec_GFp_simple_point_finish,
 258         ec_GFp_simple_point_clear_finish,
 259         ec_GFp_simple_point_copy,
 260         ec_GFp_simple_point_set_to_infinity,
 261         ec_GFp_simple_set_Jprojective_coordinates_GFp,
 262         ec_GFp_simple_get_Jprojective_coordinates_GFp,
 263         ec_GFp_simple_point_set_affine_coordinates,
 264         ec_GFp_nistp224_point_get_affine_coordinates,
 265         0 /* point_set_compressed_coordinates */ ,
 266         0 /* point2oct */ ,
 267         0 /* oct2point */ ,
 268         ec_GFp_simple_add,
 269         ec_GFp_simple_dbl,
 270         ec_GFp_simple_invert,
 271         ec_GFp_simple_is_at_infinity,
 272         ec_GFp_simple_is_on_curve,
 273         ec_GFp_simple_cmp,
 274         ec_GFp_simple_make_affine,
 275         ec_GFp_simple_points_make_affine,
 276         ec_GFp_nistp224_points_mul,
 277         ec_GFp_nistp224_precompute_mult,
 278         ec_GFp_nistp224_have_precompute_mult,
 279         ec_GFp_nist_field_mul,
 280         ec_GFp_nist_field_sqr,
 281         0 /* field_div */ ,
 282         ec_GFp_simple_field_inv,
 283         0 /* field_encode */ ,
 284         0 /* field_decode */ ,
 285         0,                      /* field_set_to_one */
 286         ec_key_simple_priv2oct,
 287         ec_key_simple_oct2priv,
 288         0, /* set private */
 289         ec_key_simple_generate_key,
 290         ec_key_simple_check_key,
 291         ec_key_simple_generate_public_key,
 292         0, /* keycopy */
 293         0, /* keyfinish */
 294         ecdh_simple_compute_key,
 295         0, /* field_inverse_mod_ord */
 296         0, /* blind_coordinates */
 297         0, /* ladder_pre */
 298         0, /* ladder_step */
 299         0  /* ladder_post */
 300     };
 301
 302     return &ret;
 303 }
 304
 305 /*
 306  * Helper functions to convert field elements to/from internal representation
 307  */
 308 static void bin28_to_felem(felem out, const u8 in[28])
 309 {
 310     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
 311     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
 312     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
 313     out[3] = (*((const uint64_t *)(in+20))) >> 8;
 314 }
 315
 316 static void felem_to_bin28(u8 out[28], const felem in)
 317 {
 318     unsigned i;
 319     for (i = 0; i < 7; ++i) {
 320         out[i] = in[0] >> (8 * i);
 321         out[i + 7] = in[1] >> (8 * i);
 322         out[i + 14] = in[2] >> (8 * i);
 323         out[i + 21] = in[3] >> (8 * i);
 324     }
 325 }
 326
 327 /* From OpenSSL BIGNUM to internal representation */
 328 static int BN_to_felem(felem out, const BIGNUM *bn)
 329 {
 330     felem_bytearray b_out;
 331     int num_bytes;
 332
 333     if (BN_is_negative(bn)) {
 334         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 335         return 0;
 336     }
 337     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
 338     if (num_bytes < 0) {
 339         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
 340         return 0;
 341     }
 342     bin28_to_felem(out, b_out);
 343     return 1;
 344 }
 345
 346 /* From internal representation to OpenSSL BIGNUM */
 347 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
 348 {
 349     felem_bytearray b_out;
 350     felem_to_bin28(b_out, in);
 351     return BN_lebin2bn(b_out, sizeof(b_out), out);
 352 }
 353
 354 /******************************************************************************/
 355 /*-
 356  *                              FIELD OPERATIONS
 357  *
 358  * Field operations, using the internal representation of field elements.
 359  * NB! These operations are specific to our point multiplication and cannot be
 360  * expected to be correct in general - e.g., multiplication with a large scalar
 361  * will cause an overflow.
 362  *
 363  */
 364
 365 static void felem_one(felem out)
 366 {
 367     out[0] = 1;
 368     out[1] = 0;
 369     out[2] = 0;
 370     out[3] = 0;
 371 }
 372
 373 static void felem_assign(felem out, const felem in)
 374 {
 375     out[0] = in[0];
 376     out[1] = in[1];
 377     out[2] = in[2];
 378     out[3] = in[3];
 379 }
 380
 381 /* Sum two field elements: out += in */
 382 static void felem_sum(felem out, const felem in)
 383 {
 384     out[0] += in[0];
 385     out[1] += in[1];
 386     out[2] += in[2];
 387     out[3] += in[3];
 388 }
 389
 390 /* Subtract field elements: out -= in */
 391 /* Assumes in[i] < 2^57 */
 392 static void felem_diff(felem out, const felem in)
 393 {
 394     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
 395     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
 396     static const limb two58m42m2 = (((limb) 1) << 58) -
 397         (((limb) 1) << 42) - (((limb) 1) << 2);
 398
 399     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 400     out[0] += two58p2;
 401     out[1] += two58m42m2;
 402     out[2] += two58m2;
 403     out[3] += two58m2;
 404
 405     out[0] -= in[0];
 406     out[1] -= in[1];
 407     out[2] -= in[2];
 408     out[3] -= in[3];
 409 }
 410
 411 /* Subtract in unreduced 128-bit mode: out -= in */
 412 /* Assumes in[i] < 2^119 */
 413 static void widefelem_diff(widefelem out, const widefelem in)
 414 {
 415     static const widelimb two120 = ((widelimb) 1) << 120;
 416     static const widelimb two120m64 = (((widelimb) 1) << 120) -
 417         (((widelimb) 1) << 64);
 418     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
 419         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
 420
 421     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 422     out[0] += two120;
 423     out[1] += two120m64;
 424     out[2] += two120m64;
 425     out[3] += two120;
 426     out[4] += two120m104m64;
 427     out[5] += two120m64;
 428     out[6] += two120m64;
 429
 430     out[0] -= in[0];
 431     out[1] -= in[1];
 432     out[2] -= in[2];
 433     out[3] -= in[3];
 434     out[4] -= in[4];
 435     out[5] -= in[5];
 436     out[6] -= in[6];
 437 }
 438
 439 /* Subtract in mixed mode: out128 -= in64 */
 440 /* in[i] < 2^63 */
 441 static void felem_diff_128_64(widefelem out, const felem in)
 442 {
 443     static const widelimb two64p8 = (((widelimb) 1) << 64) +
 444         (((widelimb) 1) << 8);
 445     static const widelimb two64m8 = (((widelimb) 1) << 64) -
 446         (((widelimb) 1) << 8);
 447     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
 448         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
 449
 450     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
 451     out[0] += two64p8;
 452     out[1] += two64m48m8;
 453     out[2] += two64m8;
 454     out[3] += two64m8;
 455
 456     out[0] -= in[0];
 457     out[1] -= in[1];
 458     out[2] -= in[2];
 459     out[3] -= in[3];
 460 }
 461
 462 /*
 463  * Multiply a field element by a scalar: out = out * scalar The scalars we
 464  * actually use are small, so results fit without overflow
 465  */
 466 static void felem_scalar(felem out, const limb scalar)
 467 {
 468     out[0] *= scalar;
 469     out[1] *= scalar;
 470     out[2] *= scalar;
 471     out[3] *= scalar;
 472 }
 473
 474 /*
 475  * Multiply an unreduced field element by a scalar: out = out * scalar The
 476  * scalars we actually use are small, so results fit without overflow
 477  */
 478 static void widefelem_scalar(widefelem out, const widelimb scalar)
 479 {
 480     out[0] *= scalar;
 481     out[1] *= scalar;
 482     out[2] *= scalar;
 483     out[3] *= scalar;
 484     out[4] *= scalar;
 485     out[5] *= scalar;
 486     out[6] *= scalar;
 487 }
 488
 489 /* Square a field element: out = in^2 */
 490 static void felem_square(widefelem out, const felem in)
 491 {
 492     limb tmp0, tmp1, tmp2;
 493     tmp0 = 2 * in[0];
 494     tmp1 = 2 * in[1];
 495     tmp2 = 2 * in[2];
 496     out[0] = ((widelimb) in[0]) * in[0];
 497     out[1] = ((widelimb) in[0]) * tmp1;
 498     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
 499     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
 500     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
 501     out[5] = ((widelimb) in[3]) * tmp2;
 502     out[6] = ((widelimb) in[3]) * in[3];
 503 }
 504
 505 /* Multiply two field elements: out = in1 * in2 */
 506 static void felem_mul(widefelem out, const felem in1, const felem in2)
 507 {
 508     out[0] = ((widelimb) in1[0]) * in2[0];
 509     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
 510     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
 511              ((widelimb) in1[2]) * in2[0];
 512     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
 513              ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
 514     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
 515              ((widelimb) in1[3]) * in2[1];
 516     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
 517     out[6] = ((widelimb) in1[3]) * in2[3];
 518 }
 519
 520 /*-
 521  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
 522  * Requires in[i] < 2^126,
 523  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
 524 static void felem_reduce(felem out, const widefelem in)
 525 {
 526     static const widelimb two127p15 = (((widelimb) 1) << 127) +
 527         (((widelimb) 1) << 15);
 528     static const widelimb two127m71 = (((widelimb) 1) << 127) -
 529         (((widelimb) 1) << 71);
 530     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
 531         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
 532     widelimb output[5];
 533
 534     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
 535     output[0] = in[0] + two127p15;
 536     output[1] = in[1] + two127m71m55;
 537     output[2] = in[2] + two127m71;
 538     output[3] = in[3];
 539     output[4] = in[4];
 540
 541     /* Eliminate in[4], in[5], in[6] */
 542     output[4] += in[6] >> 16;
 543     output[3] += (in[6] & 0xffff) << 40;
 544     output[2] -= in[6];
 545
 546     output[3] += in[5] >> 16;
 547     output[2] += (in[5] & 0xffff) << 40;
 548     output[1] -= in[5];
 549
 550     output[2] += output[4] >> 16;
 551     output[1] += (output[4] & 0xffff) << 40;
 552     output[0] -= output[4];
 553
 554     /* Carry 2 -> 3 -> 4 */
 555     output[3] += output[2] >> 56;
 556     output[2] &= 0x00ffffffffffffff;
 557
 558     output[4] = output[3] >> 56;
 559     output[3] &= 0x00ffffffffffffff;
 560
 561     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
 562
 563     /* Eliminate output[4] */
 564     output[2] += output[4] >> 16;
 565     /* output[2] < 2^56 + 2^56 = 2^57 */
 566     output[1] += (output[4] & 0xffff) << 40;
 567     output[0] -= output[4];
 568
 569     /* Carry 0 -> 1 -> 2 -> 3 */
 570     output[1] += output[0] >> 56;
 571     out[0] = output[0] & 0x00ffffffffffffff;
 572
 573     output[2] += output[1] >> 56;
 574     /* output[2] < 2^57 + 2^72 */
 575     out[1] = output[1] & 0x00ffffffffffffff;
 576     output[3] += output[2] >> 56;
 577     /* output[3] <= 2^56 + 2^16 */
 578     out[2] = output[2] & 0x00ffffffffffffff;
 579
 580     /*-
 581      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
 582      * out[3] <= 2^56 + 2^16 (due to final carry),
 583      * so out < 2*p
 584      */
 585     out[3] = output[3];
 586 }
 587
 588 static void felem_square_reduce(felem out, const felem in)
 589 {
 590     widefelem tmp;
 591     felem_square(tmp, in);
 592     felem_reduce(out, tmp);
 593 }
 594
 595 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
 596 {
 597     widefelem tmp;
 598     felem_mul(tmp, in1, in2);
 599     felem_reduce(out, tmp);
 600 }
 601
 602 /*
 603  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
 604  * call felem_reduce first)
 605  */
 606 static void felem_contract(felem out, const felem in)
 607 {
 608     static const int64_t two56 = ((limb) 1) << 56;
 609     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
 610     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
 611     int64_t tmp[4], a;
 612     tmp[0] = in[0];
 613     tmp[1] = in[1];
 614     tmp[2] = in[2];
 615     tmp[3] = in[3];
 616     /* Case 1: a = 1 iff in >= 2^224 */
 617     a = (in[3] >> 56);
 618     tmp[0] -= a;
 619     tmp[1] += a << 40;
 620     tmp[3] &= 0x00ffffffffffffff;
 621     /*
 622      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
 623      * and the lower part is non-zero
 624      */
 625     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
 626         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
 627     a &= 0x00ffffffffffffff;
 628     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
 629     a = (a - 1) >> 63;
 630     /* subtract 2^224 - 2^96 + 1 if a is all-one */
 631     tmp[3] &= a ^ 0xffffffffffffffff;
 632     tmp[2] &= a ^ 0xffffffffffffffff;
 633     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
 634     tmp[0] -= 1 & a;
 635
 636     /*
 637      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
 638      * non-zero, so we only need one step
 639      */
 640     a = tmp[0] >> 63;
 641     tmp[0] += two56 & a;
 642     tmp[1] -= 1 & a;
 643
 644     /* carry 1 -> 2 -> 3 */
 645     tmp[2] += tmp[1] >> 56;
 646     tmp[1] &= 0x00ffffffffffffff;
 647
 648     tmp[3] += tmp[2] >> 56;
 649     tmp[2] &= 0x00ffffffffffffff;
 650
 651     /* Now 0 <= out < p */
 652     out[0] = tmp[0];
 653     out[1] = tmp[1];
 654     out[2] = tmp[2];
 655     out[3] = tmp[3];
 656 }
 657
 658 /*
 659  * Get negative value: out = -in
 660  * Requires in[i] < 2^63,
 661  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
 662  */
 663 static void felem_neg(felem out, const felem in)
 664 {
 665     widefelem tmp = {0};
 666     felem_diff_128_64(tmp, in);
 667     felem_reduce(out, tmp);
 668 }
 669
 670 /*
 671  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
 672  * elements are reduced to in < 2^225, so we only need to check three cases:
 673  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
 674  */
 675 static limb felem_is_zero(const felem in)
 676 {
 677     limb zero, two224m96p1, two225m97p2;
 678
 679     zero = in[0] | in[1] | in[2] | in[3];
 680     zero = (((int64_t) (zero) - 1) >> 63) & 1;
 681     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
 682         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
 683     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
 684     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
 685         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
 686     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
 687     return (zero | two224m96p1 | two225m97p2);
 688 }
 689
 690 static int felem_is_zero_int(const void *in)
 691 {
 692     return (int)(felem_is_zero(in) & ((limb) 1));
 693 }
 694
 695 /* Invert a field element */
 696 /* Computation chain copied from djb's code */
 697 static void felem_inv(felem out, const felem in)
 698 {
 699     felem ftmp, ftmp2, ftmp3, ftmp4;
 700     widefelem tmp;
 701     unsigned i;
 702
 703     felem_square(tmp, in);
 704     felem_reduce(ftmp, tmp);    /* 2 */
 705     felem_mul(tmp, in, ftmp);
 706     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
 707     felem_square(tmp, ftmp);
 708     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
 709     felem_mul(tmp, in, ftmp);
 710     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
 711     felem_square(tmp, ftmp);
 712     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
 713     felem_square(tmp, ftmp2);
 714     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
 715     felem_square(tmp, ftmp2);
 716     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
 717     felem_mul(tmp, ftmp2, ftmp);
 718     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
 719     felem_square(tmp, ftmp);
 720     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
 721     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
 722         felem_square(tmp, ftmp2);
 723         felem_reduce(ftmp2, tmp);
 724     }
 725     felem_mul(tmp, ftmp2, ftmp);
 726     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
 727     felem_square(tmp, ftmp2);
 728     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
 729     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
 730         felem_square(tmp, ftmp3);
 731         felem_reduce(ftmp3, tmp);
 732     }
 733     felem_mul(tmp, ftmp3, ftmp2);
 734     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
 735     felem_square(tmp, ftmp2);
 736     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
 737     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
 738         felem_square(tmp, ftmp3);
 739         felem_reduce(ftmp3, tmp);
 740     }
 741     felem_mul(tmp, ftmp3, ftmp2);
 742     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
 743     felem_square(tmp, ftmp3);
 744     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
 745     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
 746         felem_square(tmp, ftmp4);
 747         felem_reduce(ftmp4, tmp);
 748     }
 749     felem_mul(tmp, ftmp3, ftmp4);
 750     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
 751     felem_square(tmp, ftmp3);
 752     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
 753     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
 754         felem_square(tmp, ftmp4);
 755         felem_reduce(ftmp4, tmp);
 756     }
 757     felem_mul(tmp, ftmp2, ftmp4);
 758     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
 759     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
 760         felem_square(tmp, ftmp2);
 761         felem_reduce(ftmp2, tmp);
 762     }
 763     felem_mul(tmp, ftmp2, ftmp);
 764     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
 765     felem_square(tmp, ftmp);
 766     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
 767     felem_mul(tmp, ftmp, in);
 768     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
 769     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
 770         felem_square(tmp, ftmp);
 771         felem_reduce(ftmp, tmp);
 772     }
 773     felem_mul(tmp, ftmp, ftmp3);
 774     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
 775 }
 776
 777 /*
 778  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
 779  * out to itself.
 780  */
 781 static void copy_conditional(felem out, const felem in, limb icopy)
 782 {
 783     unsigned i;
 784     /*
 785      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
 786      */
 787     const limb copy = -icopy;
 788     for (i = 0; i < 4; ++i) {
 789         const limb tmp = copy & (in[i] ^ out[i]);
 790         out[i] ^= tmp;
 791     }
 792 }
 793
 794 /******************************************************************************/
 795 /*-
 796  *                       ELLIPTIC CURVE POINT OPERATIONS
 797  *
 798  * Points are represented in Jacobian projective coordinates:
 799  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
 800  * or to the point at infinity if Z == 0.
 801  *
 802  */
 803
 804 /*-
 805  * Double an elliptic curve point:
 806  * (X', Y', Z') = 2 * (X, Y, Z), where
 807  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
 808  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
 809  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
 810  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
 811  * while x_out == y_in is not (maybe this works, but it's not tested).
 812  */
 813 static void
 814 point_double(felem x_out, felem y_out, felem z_out,
 815              const felem x_in, const felem y_in, const felem z_in)
 816 {
 817     widefelem tmp, tmp2;
 818     felem delta, gamma, beta, alpha, ftmp, ftmp2;
 819
 820     felem_assign(ftmp, x_in);
 821     felem_assign(ftmp2, x_in);
 822
 823     /* delta = z^2 */
 824     felem_square(tmp, z_in);
 825     felem_reduce(delta, tmp);
 826
 827     /* gamma = y^2 */
 828     felem_square(tmp, y_in);
 829     felem_reduce(gamma, tmp);
 830
 831     /* beta = x*gamma */
 832     felem_mul(tmp, x_in, gamma);
 833     felem_reduce(beta, tmp);
 834
 835     /* alpha = 3*(x-delta)*(x+delta) */
 836     felem_diff(ftmp, delta);
 837     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
 838     felem_sum(ftmp2, delta);
 839     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
 840     felem_scalar(ftmp2, 3);
 841     /* ftmp2[i] < 3 * 2^58 < 2^60 */
 842     felem_mul(tmp, ftmp, ftmp2);
 843     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
 844     felem_reduce(alpha, tmp);
 845
 846     /* x' = alpha^2 - 8*beta */
 847     felem_square(tmp, alpha);
 848     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 849     felem_assign(ftmp, beta);
 850     felem_scalar(ftmp, 8);
 851     /* ftmp[i] < 8 * 2^57 = 2^60 */
 852     felem_diff_128_64(tmp, ftmp);
 853     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 854     felem_reduce(x_out, tmp);
 855
 856     /* z' = (y + z)^2 - gamma - delta */
 857     felem_sum(delta, gamma);
 858     /* delta[i] < 2^57 + 2^57 = 2^58 */
 859     felem_assign(ftmp, y_in);
 860     felem_sum(ftmp, z_in);
 861     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
 862     felem_square(tmp, ftmp);
 863     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
 864     felem_diff_128_64(tmp, delta);
 865     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
 866     felem_reduce(z_out, tmp);
 867
 868     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
 869     felem_scalar(beta, 4);
 870     /* beta[i] < 4 * 2^57 = 2^59 */
 871     felem_diff(beta, x_out);
 872     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
 873     felem_mul(tmp, alpha, beta);
 874     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
 875     felem_square(tmp2, gamma);
 876     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
 877     widefelem_scalar(tmp2, 8);
 878     /* tmp2[i] < 8 * 2^116 = 2^119 */
 879     widefelem_diff(tmp, tmp2);
 880     /* tmp[i] < 2^119 + 2^120 < 2^121 */
 881     felem_reduce(y_out, tmp);
 882 }
 883
 884 /*-
 885  * Add two elliptic curve points:
 886  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
 887  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
 888  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
 889  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
 890  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
 891  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
 892  *
 893  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
 894  */
 895
 896 /*
 897  * This function is not entirely constant-time: it includes a branch for
 898  * checking whether the two input points are equal, (while not equal to the
 899  * point at infinity). This case never happens during single point
 900  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
 901  */
 902 static void point_add(felem x3, felem y3, felem z3,
 903                       const felem x1, const felem y1, const felem z1,
 904                       const int mixed, const felem x2, const felem y2,
 905                       const felem z2)
 906 {
 907     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
 908     widefelem tmp, tmp2;
 909     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
 910
 911     if (!mixed) {
 912         /* ftmp2 = z2^2 */
 913         felem_square(tmp, z2);
 914         felem_reduce(ftmp2, tmp);
 915
 916         /* ftmp4 = z2^3 */
 917         felem_mul(tmp, ftmp2, z2);
 918         felem_reduce(ftmp4, tmp);
 919
 920         /* ftmp4 = z2^3*y1 */
 921         felem_mul(tmp2, ftmp4, y1);
 922         felem_reduce(ftmp4, tmp2);
 923
 924         /* ftmp2 = z2^2*x1 */
 925         felem_mul(tmp2, ftmp2, x1);
 926         felem_reduce(ftmp2, tmp2);
 927     } else {
 928         /*
 929          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
 930          */
 931
 932         /* ftmp4 = z2^3*y1 */
 933         felem_assign(ftmp4, y1);
 934
 935         /* ftmp2 = z2^2*x1 */
 936         felem_assign(ftmp2, x1);
 937     }
 938
 939     /* ftmp = z1^2 */
 940     felem_square(tmp, z1);
 941     felem_reduce(ftmp, tmp);
 942
 943     /* ftmp3 = z1^3 */
 944     felem_mul(tmp, ftmp, z1);
 945     felem_reduce(ftmp3, tmp);
 946
 947     /* tmp = z1^3*y2 */
 948     felem_mul(tmp, ftmp3, y2);
 949     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 950
 951     /* ftmp3 = z1^3*y2 - z2^3*y1 */
 952     felem_diff_128_64(tmp, ftmp4);
 953     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 954     felem_reduce(ftmp3, tmp);
 955
 956     /* tmp = z1^2*x2 */
 957     felem_mul(tmp, ftmp, x2);
 958     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
 959
 960     /* ftmp = z1^2*x2 - z2^2*x1 */
 961     felem_diff_128_64(tmp, ftmp2);
 962     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
 963     felem_reduce(ftmp, tmp);
 964
 965     /*
 966      * the formulae are incorrect if the points are equal so we check for
 967      * this and do doubling if this happens
 968      */
 969     x_equal = felem_is_zero(ftmp);
 970     y_equal = felem_is_zero(ftmp3);
 971     z1_is_zero = felem_is_zero(z1);
 972     z2_is_zero = felem_is_zero(z2);
 973     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
 974     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
 975         point_double(x3, y3, z3, x1, y1, z1);
 976         return;
 977     }
 978
 979     /* ftmp5 = z1*z2 */
 980     if (!mixed) {
 981         felem_mul(tmp, z1, z2);
 982         felem_reduce(ftmp5, tmp);
 983     } else {
 984         /* special case z2 = 0 is handled later */
 985         felem_assign(ftmp5, z1);
 986     }
 987
 988     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
 989     felem_mul(tmp, ftmp, ftmp5);
 990     felem_reduce(z_out, tmp);
 991
 992     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
 993     felem_assign(ftmp5, ftmp);
 994     felem_square(tmp, ftmp);
 995     felem_reduce(ftmp, tmp);
 996
 997     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
 998     felem_mul(tmp, ftmp, ftmp5);
 999     felem_reduce(ftmp5, tmp);
1000
1001     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1002     felem_mul(tmp, ftmp2, ftmp);
1003     felem_reduce(ftmp2, tmp);
1004
1005     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1006     felem_mul(tmp, ftmp4, ftmp5);
1007     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1008
1009     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1010     felem_square(tmp2, ftmp3);
1011     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1012
1013     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1014     felem_diff_128_64(tmp2, ftmp5);
1015     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1016
1017     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1018     felem_assign(ftmp5, ftmp2);
1019     felem_scalar(ftmp5, 2);
1020     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1021
1022     /*-
1023      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1024      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1025      */
1026     felem_diff_128_64(tmp2, ftmp5);
1027     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1028     felem_reduce(x_out, tmp2);
1029
1030     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1031     felem_diff(ftmp2, x_out);
1032     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1033
1034     /*
1035      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1036      */
1037     felem_mul(tmp2, ftmp3, ftmp2);
1038     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1039
1040     /*-
1041      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1042      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1043      */
1044     widefelem_diff(tmp2, tmp);
1045     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1046     felem_reduce(y_out, tmp2);
1047
1048     /*
1049      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1050      * the point at infinity, so we need to check for this separately
1051      */
1052
1053     /*
1054      * if point 1 is at infinity, copy point 2 to output, and vice versa
1055      */
1056     copy_conditional(x_out, x2, z1_is_zero);
1057     copy_conditional(x_out, x1, z2_is_zero);
1058     copy_conditional(y_out, y2, z1_is_zero);
1059     copy_conditional(y_out, y1, z2_is_zero);
1060     copy_conditional(z_out, z2, z1_is_zero);
1061     copy_conditional(z_out, z1, z2_is_zero);
1062     felem_assign(x3, x_out);
1063     felem_assign(y3, y_out);
1064     felem_assign(z3, z_out);
1065 }
1066
1067 /*
1068  * select_point selects the |idx|th point from a precomputation table and
1069  * copies it to out.
1070  * The pre_comp array argument should be size of |size| argument
1071  */
1072 static void select_point(const u64 idx, unsigned int size,
1073                          const felem pre_comp[][3], felem out[3])
1074 {
1075     unsigned i, j;
1076     limb *outlimbs = &out[0][0];
1077
1078     memset(out, 0, sizeof(*out) * 3);
1079     for (i = 0; i < size; i++) {
1080         const limb *inlimbs = &pre_comp[i][0][0];
1081         u64 mask = i ^ idx;
1082         mask |= mask >> 4;
1083         mask |= mask >> 2;
1084         mask |= mask >> 1;
1085         mask &= 1;
1086         mask--;
1087         for (j = 0; j < 4 * 3; j++)
1088             outlimbs[j] |= inlimbs[j] & mask;
1089     }
1090 }
1091
1092 /* get_bit returns the |i|th bit in |in| */
1093 static char get_bit(const felem_bytearray in, unsigned i)
1094 {
1095     if (i >= 224)
1096         return 0;
1097     return (in[i >> 3] >> (i & 7)) & 1;
1098 }
1099
1100 /*
1101  * Interleaved point multiplication using precomputed point multiples: The
1102  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1103  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1104  * generator, using certain (large) precomputed multiples in g_pre_comp.
1105  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1106  */
1107 static void batch_mul(felem x_out, felem y_out, felem z_out,
1108                       const felem_bytearray scalars[],
1109                       const unsigned num_points, const u8 *g_scalar,
1110                       const int mixed, const felem pre_comp[][17][3],
1111                       const felem g_pre_comp[2][16][3])
1112 {
1113     int i, skip;
1114     unsigned num;
1115     unsigned gen_mul = (g_scalar != NULL);
1116     felem nq[3], tmp[4];
1117     u64 bits;
1118     u8 sign, digit;
1119
1120     /* set nq to the point at infinity */
1121     memset(nq, 0, sizeof(nq));
1122
1123     /*
1124      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1125      * of the generator (two in each of the last 28 rounds) and additions of
1126      * other points multiples (every 5th round).
1127      */
1128     skip = 1;                   /* save two point operations in the first
1129                                  * round */
1130     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1131         /* double */
1132         if (!skip)
1133             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1134
1135         /* add multiples of the generator */
1136         if (gen_mul && (i <= 27)) {
1137             /* first, look 28 bits upwards */
1138             bits = get_bit(g_scalar, i + 196) << 3;
1139             bits |= get_bit(g_scalar, i + 140) << 2;
1140             bits |= get_bit(g_scalar, i + 84) << 1;
1141             bits |= get_bit(g_scalar, i + 28);
1142             /* select the point to add, in constant time */
1143             select_point(bits, 16, g_pre_comp[1], tmp);
1144
1145             if (!skip) {
1146                 /* value 1 below is argument for "mixed" */
1147                 point_add(nq[0], nq[1], nq[2],
1148                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1149             } else {
1150                 memcpy(nq, tmp, 3 * sizeof(felem));
1151                 skip = 0;
1152             }
1153
1154             /* second, look at the current position */
1155             bits = get_bit(g_scalar, i + 168) << 3;
1156             bits |= get_bit(g_scalar, i + 112) << 2;
1157             bits |= get_bit(g_scalar, i + 56) << 1;
1158             bits |= get_bit(g_scalar, i);
1159             /* select the point to add, in constant time */
1160             select_point(bits, 16, g_pre_comp[0], tmp);
1161             point_add(nq[0], nq[1], nq[2],
1162                       nq[0], nq[1], nq[2],
1163                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1164         }
1165
1166         /* do other additions every 5 doublings */
1167         if (num_points && (i % 5 == 0)) {
1168             /* loop over all scalars */
1169             for (num = 0; num < num_points; ++num) {
1170                 bits = get_bit(scalars[num], i + 4) << 5;
1171                 bits |= get_bit(scalars[num], i + 3) << 4;
1172                 bits |= get_bit(scalars[num], i + 2) << 3;
1173                 bits |= get_bit(scalars[num], i + 1) << 2;
1174                 bits |= get_bit(scalars[num], i) << 1;
1175                 bits |= get_bit(scalars[num], i - 1);
1176                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1177
1178                 /* select the point to add or subtract */
1179                 select_point(digit, 17, pre_comp[num], tmp);
1180                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1181                                             * point */
1182                 copy_conditional(tmp[1], tmp[3], sign);
1183
1184                 if (!skip) {
1185                     point_add(nq[0], nq[1], nq[2],
1186                               nq[0], nq[1], nq[2],
1187                               mixed, tmp[0], tmp[1], tmp[2]);
1188                 } else {
1189                     memcpy(nq, tmp, 3 * sizeof(felem));
1190                     skip = 0;
1191                 }
1192             }
1193         }
1194     }
1195     felem_assign(x_out, nq[0]);
1196     felem_assign(y_out, nq[1]);
1197     felem_assign(z_out, nq[2]);
1198 }
1199
1200 /******************************************************************************/
1201 /*
1202  * FUNCTIONS TO MANAGE PRECOMPUTATION
1203  */
1204
1205 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1206 {
1207     NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1208
1209     if (!ret) {
1210         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1211         return ret;
1212     }
1213
1214     ret->references = 1;
1215
1216     ret->lock = CRYPTO_THREAD_lock_new();
1217     if (ret->lock == NULL) {
1218         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1219         OPENSSL_free(ret);
1220         return NULL;
1221     }
1222     return ret;
1223 }
1224
1225 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1226 {
1227     int i;
1228     if (p != NULL)
1229         CRYPTO_UP_REF(&p->references, &i, p->lock);
1230     return p;
1231 }
1232
1233 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1234 {
1235     int i;
1236
1237     if (p == NULL)
1238         return;
1239
1240     CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1241     REF_PRINT_COUNT("EC_nistp224", x);
1242     if (i > 0)
1243         return;
1244     REF_ASSERT_ISNT(i < 0);
1245
1246     CRYPTO_THREAD_lock_free(p->lock);
1247     OPENSSL_free(p);
1248 }
1249
1250 /******************************************************************************/
1251 /*
1252  * OPENSSL EC_METHOD FUNCTIONS
1253  */
1254
1255 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1256 {
1257     int ret;
1258     ret = ec_GFp_simple_group_init(group);
1259     group->a_is_minus3 = 1;
1260     return ret;
1261 }
1262
1263 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1264                                     const BIGNUM *a, const BIGNUM *b,
1265                                     BN_CTX *ctx)
1266 {
1267     int ret = 0;
1268     BN_CTX *new_ctx = NULL;
1269     BIGNUM *curve_p, *curve_a, *curve_b;
1270
1271     if (ctx == NULL)
1272         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1273             return 0;
1274     BN_CTX_start(ctx);
1275     curve_p = BN_CTX_get(ctx);
1276     curve_a = BN_CTX_get(ctx);
1277     curve_b = BN_CTX_get(ctx);
1278     if (curve_b == NULL)
1279         goto err;
1280     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1281     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1282     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1283     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1284         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1285               EC_R_WRONG_CURVE_PARAMETERS);
1286         goto err;
1287     }
1288     group->field_mod_func = BN_nist_mod_224;
1289     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1290  err:
1291     BN_CTX_end(ctx);
1292     BN_CTX_free(new_ctx);
1293     return ret;
1294 }
1295
1296 /*
1297  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1298  * (X/Z^2, Y/Z^3)
1299  */
1300 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1301                                                  const EC_POINT *point,
1302                                                  BIGNUM *x, BIGNUM *y,
1303                                                  BN_CTX *ctx)
1304 {
1305     felem z1, z2, x_in, y_in, x_out, y_out;
1306     widefelem tmp;
1307
1308     if (EC_POINT_is_at_infinity(group, point)) {
1309         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1310               EC_R_POINT_AT_INFINITY);
1311         return 0;
1312     }
1313     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1314         (!BN_to_felem(z1, point->Z)))
1315         return 0;
1316     felem_inv(z2, z1);
1317     felem_square(tmp, z2);
1318     felem_reduce(z1, tmp);
1319     felem_mul(tmp, x_in, z1);
1320     felem_reduce(x_in, tmp);
1321     felem_contract(x_out, x_in);
1322     if (x != NULL) {
1323         if (!felem_to_BN(x, x_out)) {
1324             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1325                   ERR_R_BN_LIB);
1326             return 0;
1327         }
1328     }
1329     felem_mul(tmp, z1, z2);
1330     felem_reduce(z1, tmp);
1331     felem_mul(tmp, y_in, z1);
1332     felem_reduce(y_in, tmp);
1333     felem_contract(y_out, y_in);
1334     if (y != NULL) {
1335         if (!felem_to_BN(y, y_out)) {
1336             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1337                   ERR_R_BN_LIB);
1338             return 0;
1339         }
1340     }
1341     return 1;
1342 }
1343
1344 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1345                                felem tmp_felems[ /* num+1 */ ])
1346 {
1347     /*
1348      * Runs in constant time, unless an input is the point at infinity (which
1349      * normally shouldn't happen).
1350      */
1351     ec_GFp_nistp_points_make_affine_internal(num,
1352                                              points,
1353                                              sizeof(felem),
1354                                              tmp_felems,
1355                                              (void (*)(void *))felem_one,
1356                                              felem_is_zero_int,
1357                                              (void (*)(void *, const void *))
1358                                              felem_assign,
1359                                              (void (*)(void *, const void *))
1360                                              felem_square_reduce, (void (*)
1361                                                                    (void *,
1362                                                                     const void
1363                                                                     *,
1364                                                                     const void
1365                                                                     *))
1366                                              felem_mul_reduce,
1367                                              (void (*)(void *, const void *))
1368                                              felem_inv,
1369                                              (void (*)(void *, const void *))
1370                                              felem_contract);
1371 }
1372
1373 /*
1374  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1375  * values Result is stored in r (r can equal one of the inputs).
1376  */
1377 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1378                                const BIGNUM *scalar, size_t num,
1379                                const EC_POINT *points[],
1380                                const BIGNUM *scalars[], BN_CTX *ctx)
1381 {
1382     int ret = 0;
1383     int j;
1384     unsigned i;
1385     int mixed = 0;
1386     BIGNUM *x, *y, *z, *tmp_scalar;
1387     felem_bytearray g_secret;
1388     felem_bytearray *secrets = NULL;
1389     felem (*pre_comp)[17][3] = NULL;
1390     felem *tmp_felems = NULL;
1391     int num_bytes;
1392     int have_pre_comp = 0;
1393     size_t num_points = num;
1394     felem x_in, y_in, z_in, x_out, y_out, z_out;
1395     NISTP224_PRE_COMP *pre = NULL;
1396     const felem(*g_pre_comp)[16][3] = NULL;
1397     EC_POINT *generator = NULL;
1398     const EC_POINT *p = NULL;
1399     const BIGNUM *p_scalar = NULL;
1400
1401     BN_CTX_start(ctx);
1402     x = BN_CTX_get(ctx);
1403     y = BN_CTX_get(ctx);
1404     z = BN_CTX_get(ctx);
1405     tmp_scalar = BN_CTX_get(ctx);
1406     if (tmp_scalar == NULL)
1407         goto err;
1408
1409     if (scalar != NULL) {
1410         pre = group->pre_comp.nistp224;
1411         if (pre)
1412             /* we have precomputation, try to use it */
1413             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1414         else
1415             /* try to use the standard precomputation */
1416             g_pre_comp = &gmul[0];
1417         generator = EC_POINT_new(group);
1418         if (generator == NULL)
1419             goto err;
1420         /* get the generator from precomputation */
1421         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1422             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1423             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1424             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1425             goto err;
1426         }
1427         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1428                                                       generator, x, y, z,
1429                                                       ctx))
1430             goto err;
1431         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1432             /* precomputation matches generator */
1433             have_pre_comp = 1;
1434         else
1435             /*
1436              * we don't have valid precomputation: treat the generator as a
1437              * random point
1438              */
1439             num_points = num_points + 1;
1440     }
1441
1442     if (num_points > 0) {
1443         if (num_points >= 3) {
1444             /*
1445              * unless we precompute multiples for just one or two points,
1446              * converting those into affine form is time well spent
1447              */
1448             mixed = 1;
1449         }
1450         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1451         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1452         if (mixed)
1453             tmp_felems =
1454                 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1455         if ((secrets == NULL) || (pre_comp == NULL)
1456             || (mixed && (tmp_felems == NULL))) {
1457             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1458             goto err;
1459         }
1460
1461         /*
1462          * we treat NULL scalars as 0, and NULL points as points at infinity,
1463          * i.e., they contribute nothing to the linear combination
1464          */
1465         for (i = 0; i < num_points; ++i) {
1466             if (i == num) {
1467                 /* the generator */
1468                 p = EC_GROUP_get0_generator(group);
1469                 p_scalar = scalar;
1470             } else {
1471                 /* the i^th point */
1472                 p = points[i];
1473                 p_scalar = scalars[i];
1474             }
1475             if ((p_scalar != NULL) && (p != NULL)) {
1476                 /* reduce scalar to 0 <= scalar < 2^224 */
1477                 if ((BN_num_bits(p_scalar) > 224)
1478                     || (BN_is_negative(p_scalar))) {
1479                     /*
1480                      * this is an unusual input, and we don't guarantee
1481                      * constant-timeness
1482                      */
1483                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1484                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1485                         goto err;
1486                     }
1487                     num_bytes = BN_bn2lebinpad(tmp_scalar,
1488                                                secrets[i], sizeof(secrets[i]));
1489                 } else {
1490                     num_bytes = BN_bn2lebinpad(p_scalar,
1491                                                secrets[i], sizeof(secrets[i]));
1492                 }
1493                 if (num_bytes < 0) {
1494                     ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1495                     goto err;
1496                 }
1497                 /* precompute multiples */
1498                 if ((!BN_to_felem(x_out, p->X)) ||
1499                     (!BN_to_felem(y_out, p->Y)) ||
1500                     (!BN_to_felem(z_out, p->Z)))
1501                     goto err;
1502                 felem_assign(pre_comp[i][1][0], x_out);
1503                 felem_assign(pre_comp[i][1][1], y_out);
1504                 felem_assign(pre_comp[i][1][2], z_out);
1505                 for (j = 2; j <= 16; ++j) {
1506                     if (j & 1) {
1507                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1508                                   pre_comp[i][j][2], pre_comp[i][1][0],
1509                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1510                                   pre_comp[i][j - 1][0],
1511                                   pre_comp[i][j - 1][1],
1512                                   pre_comp[i][j - 1][2]);
1513                     } else {
1514                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1515                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1516                                      pre_comp[i][j / 2][1],
1517                                      pre_comp[i][j / 2][2]);
1518                     }
1519                 }
1520             }
1521         }
1522         if (mixed)
1523             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1524     }
1525
1526     /* the scalar for the generator */
1527     if ((scalar != NULL) && (have_pre_comp)) {
1528         memset(g_secret, 0, sizeof(g_secret));
1529         /* reduce scalar to 0 <= scalar < 2^224 */
1530         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1531             /*
1532              * this is an unusual input, and we don't guarantee
1533              * constant-timeness
1534              */
1535             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1536                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1537                 goto err;
1538             }
1539             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1540         } else {
1541             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1542         }
1543         /* do the multiplication with generator precomputation */
1544         batch_mul(x_out, y_out, z_out,
1545                   (const felem_bytearray(*))secrets, num_points,
1546                   g_secret,
1547                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1548     } else {
1549         /* do the multiplication without generator precomputation */
1550         batch_mul(x_out, y_out, z_out,
1551                   (const felem_bytearray(*))secrets, num_points,
1552                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1553     }
1554     /* reduce the output to its unique minimal representation */
1555     felem_contract(x_in, x_out);
1556     felem_contract(y_in, y_out);
1557     felem_contract(z_in, z_out);
1558     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1559         (!felem_to_BN(z, z_in))) {
1560         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1561         goto err;
1562     }
1563     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1564
1565  err:
1566     BN_CTX_end(ctx);
1567     EC_POINT_free(generator);
1568     OPENSSL_free(secrets);
1569     OPENSSL_free(pre_comp);
1570     OPENSSL_free(tmp_felems);
1571     return ret;
1572 }
1573
1574 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1575 {
1576     int ret = 0;
1577     NISTP224_PRE_COMP *pre = NULL;
1578     int i, j;
1579     BN_CTX *new_ctx = NULL;
1580     BIGNUM *x, *y;
1581     EC_POINT *generator = NULL;
1582     felem tmp_felems[32];
1583
1584     /* throw away old precomputation */
1585     EC_pre_comp_free(group);
1586     if (ctx == NULL)
1587         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1588             return 0;
1589     BN_CTX_start(ctx);
1590     x = BN_CTX_get(ctx);
1591     y = BN_CTX_get(ctx);
1592     if (y == NULL)
1593         goto err;
1594     /* get the generator */
1595     if (group->generator == NULL)
1596         goto err;
1597     generator = EC_POINT_new(group);
1598     if (generator == NULL)
1599         goto err;
1600     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1601     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1602     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1603         goto err;
1604     if ((pre = nistp224_pre_comp_new()) == NULL)
1605         goto err;
1606     /*
1607      * if the generator is the standard one, use built-in precomputation
1608      */
1609     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1610         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1611         goto done;
1612     }
1613     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1614         (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1615         (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1616         goto err;
1617     /*
1618      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1619      * 2^140*G, 2^196*G for the second one
1620      */
1621     for (i = 1; i <= 8; i <<= 1) {
1622         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1623                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1624                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1625         for (j = 0; j < 27; ++j) {
1626             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1627                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1628                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1629         }
1630         if (i == 8)
1631             break;
1632         point_double(pre->g_pre_comp[0][2 * i][0],
1633                      pre->g_pre_comp[0][2 * i][1],
1634                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1635                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1636         for (j = 0; j < 27; ++j) {
1637             point_double(pre->g_pre_comp[0][2 * i][0],
1638                          pre->g_pre_comp[0][2 * i][1],
1639                          pre->g_pre_comp[0][2 * i][2],
1640                          pre->g_pre_comp[0][2 * i][0],
1641                          pre->g_pre_comp[0][2 * i][1],
1642                          pre->g_pre_comp[0][2 * i][2]);
1643         }
1644     }
1645     for (i = 0; i < 2; i++) {
1646         /* g_pre_comp[i][0] is the point at infinity */
1647         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1648         /* the remaining multiples */
1649         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1650         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1651                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1652                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1653                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1654                   pre->g_pre_comp[i][2][2]);
1655         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1656         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1657                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1658                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1659                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1660                   pre->g_pre_comp[i][2][2]);
1661         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1662         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1663                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1664                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1665                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1666                   pre->g_pre_comp[i][4][2]);
1667         /*
1668          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1669          */
1670         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1671                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1672                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1673                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1674                   pre->g_pre_comp[i][2][2]);
1675         for (j = 1; j < 8; ++j) {
1676             /* odd multiples: add G resp. 2^28*G */
1677             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1678                       pre->g_pre_comp[i][2 * j + 1][1],
1679                       pre->g_pre_comp[i][2 * j + 1][2],
1680                       pre->g_pre_comp[i][2 * j][0],
1681                       pre->g_pre_comp[i][2 * j][1],
1682                       pre->g_pre_comp[i][2 * j][2], 0,
1683                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1684                       pre->g_pre_comp[i][1][2]);
1685         }
1686     }
1687     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1688
1689  done:
1690     SETPRECOMP(group, nistp224, pre);
1691     pre = NULL;
1692     ret = 1;
1693  err:
1694     BN_CTX_end(ctx);
1695     EC_POINT_free(generator);
1696     BN_CTX_free(new_ctx);
1697     EC_nistp224_pre_comp_free(pre);
1698     return ret;
1699 }
1700
1701 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1702 {
1703     return HAVEPRECOMP(group, nistp224);
1704 }
1705
1706 #endif